### get tab names
xlsx_file_path <- file.path(input_dir,"raw_datasets.xlsx")
tab_names<-openxlsx::getSheetNames(xlsx_file_path)
### list with separate element for each tab
list_of_datasets <- lapply(tab_names,
openxlsx::read.xlsx,xlsxFile=xlsx_file_path)
names(list_of_datasets) <- tab_names
### open list for data plotted in each figure panel
list_of_diff_counts <- list()
list_of_stats <- list()
##### set prior for log2 (0 for MSD or Fig 1/2, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["Figure1BDEF"]] %>%
dplyr::filter(Fig %in% c("figure 1B upper panel","figure 1B bottom panel")) %>%
### add log10
dplyr::mutate(log10_value = log10(value_raw + prior_to_use))
colors_df <- data.frame("panel"=c("figure 1B upper panel","figure 1B bottom panel"),
"color"=c("darkred","grey20"))
### To name output files
contrast_name <- "Fig1B"
num_columns_for_plot <- 1
list_of_plots_mean <- list()
## for each analyte of interest
for(i in unique(data_to_use$Fig)){
phen_to_plot <- data_to_use %>%
dplyr::filter(Fig == i) %>%
## make timepoint for plotting
dplyr::mutate(TimePoint = factor(TimePoint,unique(dplyr::filter(data_to_use, Fig == i)$TimePoint)))
curr_color <- colors_df$color[which(colors_df$panel == i)]
#### base plot
p <- ggplot(phen_to_plot, aes(x = TimePoint, y = log10_value, group = MacaqueID)) +
geom_line(color = curr_color, linewidth = 0.3)+
scale_x_discrete(labels = unique(phen_to_plot$TimePoint) %>%
str_replace("w",", w") %>%
str_remove_all("k"))+
#scale_y_continuous(transform = "log10")+
labs(title = i)+
ylab("log10(value)")+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
p_mean <- p +
stat_summary(aes(group = 1, y = log10_value), fun=mean, geom="line", colour=curr_color, linewidth = 1)+
stat_summary(aes(group = 1, y = log10_value), fun=mean, geom="point", colour=curr_color)
# plot(p)
list_of_plots_mean[[i]] <- p_mean
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots_mean, ncol=num_columns_for_plot))
cat("\n")
cat(' \n \n') ### this is the key!
##### set prior for log2 (0 for MSD or Fig 1/2, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["Figure1BDEF"]] %>%
dplyr::filter(Fig == c("figure1D")) %>%
### add log10
dplyr::mutate(log10_value = log10(value_raw + prior_to_use),
TimePoint = as.numeric(TimePoint))
#### set y axis limits
y_axis_limits <- c(1,6)
shapes_to_use <- c(24,21,4,22,25)
### To name output files
contrast_name <- "Fig1D"
num_columns_for_plot <- 1
p <- ggplot(data_to_use, aes(x = TimePoint, y = log10_value, group = MacaqueID)) +
geom_line(color = "darkblue", linewidth = 0.3)+
geom_point(aes(shape = MacaqueID), color = "darkblue", fill="darkblue")+
scale_x_continuous(limits = c(0,120), breaks = c(seq(0,28,4),seq(40,120,20)))+
scale_y_continuous(limits = y_axis_limits)+
scale_shape_manual(values = shapes_to_use)+
labs(title = unique(data_to_use$Fig))+
xlab("weeks of study")+
ylab("log10(value)")+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"))
plot(p)
##### set prior for log2 (0 for MSD or Fig 1/2, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["Figure1BDEF"]] %>%
dplyr::filter(Fig == c("figure1E")) %>%
### add log10
dplyr::mutate(log10_value = value_raw, ### already logged
TimePoint = as.numeric(TimePoint))
#### set y axis limits
y_axis_limits <- c(0.5,5)
shapes_to_use <- c(24,21,22,25)
### To name output files
contrast_name <- "Fig1E"
num_columns_for_plot <- 1
p <- ggplot(data_to_use, aes(x = TimePoint, y = log10_value, group = MacaqueID)) +
geom_line(color = "darkblue", linewidth = 0.3)+
geom_point(aes(shape = MacaqueID), color = "darkblue", fill="darkblue")+
scale_x_continuous(limits = c(0,120), breaks = c(seq(0,28,4),seq(40,120,20)))+
scale_y_continuous(limits = y_axis_limits)+
scale_shape_manual(values = shapes_to_use)+
labs(title = unique(data_to_use$Fig))+
xlab("weeks of study")+
ylab("log10(value)")+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"))
plot(p)
##### set prior for log2 (0 for MSD or Fig 1/2, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["Figure1BDEF"]] %>%
dplyr::filter(Fig == c("figure1F")) %>%
### add log10
dplyr::mutate(log10_value = log10(value_raw + prior_to_use))
plot_sig_only <- "no"
### To name output files
contrast_name <- "Fig1F"
num_columns_for_plot <- 1
phen_to_plot <- data_to_use %>%
dplyr::mutate(TimePoint = factor(TimePoint, c("V2w2L", #DNA/LION
"V2w2D", #DNA(IM)
"V2w2EP",#DNA(EP)
"V2w2DP" #DNA(EP)+protein
)))
#### calculate MannWhitney between groups(i.e. TimePoint)
curr_test <- compare_means(log10_value ~ TimePoint, data = phen_to_plot, method = "wilcox.test")
list_of_stats[[paste0(contrast_name)]] <- curr_test
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$log10_value, na.rm=TRUE)
min_y <- min(phen_to_plot$log10_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(curr_test),
length(which(curr_test[,"p"] < 0.05))
)
maxy_y_to_plot <- max_y*(1+0.05*(num_contrasts+1))
color_df <- data.frame("TimePoint"=levels(phen_to_plot$TimePoint),
"color"=c("darkred","grey50","black","black"),
"shape"=c(16,16,2,6))
#### base plot
p <- ggplot(phen_to_plot, aes(x = TimePoint, y = log10_value)) +
scale_color_manual(values=color_df$color) +
scale_shape_manual(values=color_df$shape) +
geom_beeswarm(aes(color = TimePoint, shape = TimePoint), size = 3, cex = 3)+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
## add line with mean
stat_summary(aes(group = 1, y = log10_value), fun=mean, geom="crossbar", colour="grey30", linetype = "11", size = 0.3)+
labs(x = "group")+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"))
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(curr_test[l,], label = "p.format",size = 4,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- curr_test[which(curr_test[,"p"] < 0.05),]
for(l in 1:nrow(sig_pvals)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p.format",size = 4,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
}
}else{
p <- p
}
}
plot(p)
#### this data is already log10
##### select data
data_to_use <- list_of_datasets[["Figure1G"]] %>%
### pivot_wider to plot
tidyr::pivot_wider(names_from = "Antibody", values_from = "value_raw") %>%
### add treatment group
dplyr::left_join(list_of_datasets[["Figure1BDEF"]] %>%
dplyr::filter(Fig == c("figure1F")) %>%
### add useful treatment
dplyr::mutate(Treatment = ifelse(grepl("L",TimePoint),"DNA_LION",
ifelse(endsWith(TimePoint,"2D"),"DNA","non"))) %>%
dplyr::select(MacaqueID,Treatment) %>%
dplyr::filter(Treatment != "non") %>%
unique(),
by = "MacaqueID"
)
### To name output files
contrast_name <- "Fig1G"
num_columns_for_plot <- 1
df_for_plot <- data_to_use %>%
## remove unneeded columns
dplyr::select(-c(Fig,Treatment)) %>%
column_to_rownames("MacaqueID")
### annotation for columns
annotation_col1 <- data.frame("Antibody_time"=colnames(df_for_plot)) %>%
## break up Antibody and timepoint
dplyr::mutate(Antibody = str_before_first(Antibody_time,"_"),
Antibody = ifelse(Antibody == "D614G",Antibody_time,Antibody), ## makes unique colnames
Time = str_after_first(Antibody_time,"_"))
# set colnames for df_for_plot
colnames(df_for_plot) <- annotation_col1$Antibody
# trim col annotation
annotation_col <- annotation_col1 %>%
dplyr::select(-Antibody_time) %>%
column_to_rownames("Antibody")
### annotation for rows
annotation_row <- data_to_use %>%
dplyr::select(c(MacaqueID,Treatment)) %>%
column_to_rownames("MacaqueID")
### color scale
mycols <- colorRampPalette(c("#EBEBEB","#0099D5"))(1000)
breakscale <- seq(0,5, length=1000)
p <- pheatmap(df_for_plot, scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
#gaps_col = gaps_to_use,
border_color = "black",
cluster_rows = FALSE, cluster_cols = FALSE,
annotation_col = annotation_col,
annotation_row = annotation_row,
display_numbers = round(df_for_plot,1)
# annotation_colors = annotation_colors
)
p
##### set prior for log2 (0 for MSD or Fig 1/2, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["Figure2ABDE"]] %>%
dplyr::filter(Fig %in% c("Fig2A top panel","Fig2 A bottom panel")) %>%
### values are already log10
dplyr::mutate(log10_value = value_raw,
group = ifelse(grepl("top",Fig),"nofill",
ifelse(grepl(paste(c("V3","V4"),collapse = '|'),TimePoint),"fill","nofill"))) %>%
## make timepoint for plotting
dplyr::mutate(TimePoint = factor(TimePoint,
unique(dplyr::filter(list_of_datasets[["Figure2ABDE"]],
Fig %in% c("Fig2A top panel","Fig2 A bottom panel"))$TimePoint)))
colors_df <- data.frame("panel"=c("Fig2A top panel","Fig2 A bottom panel"),
"color"=c("darkred","grey20"))
fill_df <- data.frame("group"=c("nofill","fill"),
"fill"=c("white","#FDC5BC")) %>%
column_to_rownames("group")
y_limits <-c(0,3)
y_breaks <- c(seq(0,0.9,0.3),seq(1,3,1))
x_labels <- levels(data_to_use$TimePoint) %>%
str_replace("w",", w") %>%
str_remove_all("k")
x_limits <- levels(data_to_use$TimePoint)
### To name output files
contrast_name <- "Fig2A"
num_columns_for_plot <- 1
list_of_plots <- list()
## for each analyte of interest
for(i in unique(data_to_use$Fig)){
phen_to_plot <- data_to_use %>%
dplyr::filter(Fig == i)
curr_color <- colors_df$color[which(colors_df$panel == i)]
#### base plot
p <- ggplot(phen_to_plot, aes(x = TimePoint, y = log10_value)) +
## addboxplot
geom_boxplot(aes(fill = group),color = curr_color)+
geom_beeswarm(color = curr_color, corral.width = 0.8)+
scale_fill_manual(values = c("nofill"=fill_df["nofill","fill"],"fill"=fill_df["fill","fill"]))+
scale_x_discrete(limits = factor(x_limits),
labels = x_labels)+
scale_y_continuous(limits = y_limits,
breaks = y_breaks)+
labs(title = i)+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
cat("\n")
cat(' \n \n') ### this is the key!
##### set prior for log2 (0 for MSD or Fig 1/2, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["Figure2ABDE"]] %>%
dplyr::filter(Fig == c("Fig2B")) %>%
### values are already log10
dplyr::mutate(log10_value = value_raw,
group = ifelse(grepl("top",Fig),"nofill","fill"),
### to add spaces in x axis
TimePoint1 = TimePoint,
TimePoint2 = factor(TimePoint, unique(dplyr::filter(list_of_datasets[["Figure2ABDE"]],Fig == c("Fig2B"))$TimePoint)),
TimePoint = factor(ifelse(TimePoint1 == "V2_wk2",as.numeric(TimePoint2)+2,
ifelse(TimePoint1 == "V2_w10",as.numeric(TimePoint2)+3,
ifelse(TimePoint1 == "V2_w42",as.numeric(TimePoint2)+5,
ifelse(TimePoint1 == "V2_w78",as.numeric(TimePoint2)+8,
ifelse(TimePoint1 == "V2_w90",as.numeric(TimePoint2)+11,
as.numeric(TimePoint2)+13))))),seq(0,19,1))
)
colors_df <- data.frame("panel"=c("Fig2B"),
"color"=c("darkblue"))
fill_df <- data.frame("group"=c("nofill","fill"),
"fill"=c("white","#B4C0FF")) %>%
column_to_rownames("group")
y_limits <-c(0,0.5)
y_breaks <- c(seq(0,0.5,0.1))
### breaks are contigous points
x_limits <- seq(0,19,1)
###
x_labels_df <- data.frame("TimePoint"=seq(0,19,1)) %>%
dplyr::mutate(TimePoint = factor(TimePoint,seq(0,19,1))) %>%
## add in actual labels
dplyr::left_join(unique(data_to_use[,c("TimePoint1","TimePoint")]), by = "TimePoint")
x_labels <- x_labels_df$TimePoint1 %>%
str_after_first("_")
x_labels[is.na(x_labels)] <- ""
### To name output files
contrast_name <- "Fig2B"
num_columns_for_plot <- 1
list_of_plots <- list()
## for each analyte of interest
for(i in unique(data_to_use$Fig)){
phen_to_plot <- data_to_use %>%
dplyr::filter(Fig == i)
curr_color <- colors_df$color[which(colors_df$panel == i)]
#### base plot
p <- ggplot(phen_to_plot, aes(x = TimePoint, y = log10_value)) +
## addboxplot
geom_boxplot(aes(fill = group),color = curr_color)+
geom_beeswarm(color = curr_color, corral.width = 0.8)+
scale_fill_manual(values = c("nofill"=fill_df["nofill","fill"],"fill"=fill_df["fill","fill"]))+
scale_x_discrete(limits = factor(x_limits),
labels = x_labels)+
scale_y_continuous(limits = y_limits,
breaks = y_breaks)+
labs(title = i)+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
cat("\n")
cat(' \n \n') ### this is the key!
##### select data
data_to_use <- list_of_datasets[["Figure2C"]] %>%
### add treatment group
dplyr::left_join(list_of_datasets[["Figure1BDEF"]] %>%
dplyr::filter(Fig == c("figure1F")) %>%
### add useful treatment
dplyr::mutate(Treatment = ifelse(grepl("L",TimePoint),"DNA_LION",
ifelse(endsWith(TimePoint,"2D"),"DNA_IM",
ifelse(grepl("EP",TimePoint),"DNA_EP","DNA_EPplus")))) %>%
dplyr::select(MacaqueID,Treatment) %>%
unique(),
by = "MacaqueID"
)
### To name output files
contrast_name <- "Fig2C"
num_columns_for_plot <- 1
list_of_plots <- list()
## for each analyte of interest
for(i in unique(data_to_use$Fig)){
phen_to_plot <- data_to_use %>%
dplyr::filter(Fig == i) %>%
dplyr::mutate(Treatment = factor(Treatment, c("DNA_LION","DNA_IM","DNA_EP","DNA_EPplus")),
CellType = factor(CellType, c("CD8","CD4")))
#### base plot
p <- ggplot(phen_to_plot, aes(x = MacaqueID, y = value_raw, fill = CellType)) +
scale_fill_manual(values = c("black","white"))+
geom_bar(position="stack", stat="identity", color = "black")+
facet_wrap(~Treatment,
strip.position = "bottom", ## treatment lables are at the bottom
scales = "free_x", ## don't force the same x ticks in all treatments
nrow = 1) +
scale_y_continuous(limits = c(0,3),
breaks = c(0,0.3,0.6,0.9,2,3))+
theme_bw()+
theme(strip.placement = "outside",
strip.background = element_rect(fill = NA, color = "white"),
panel.spacing = unit(-.01,"cm"),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
cat("\n")
cat(' \n \n') ### this is the key!
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- c("Fig2D","Fig2E")
##### select data
data_to_use <- list_of_datasets[["Figure2ABDE"]] %>%
### filter to only vars of interes
dplyr::filter(grepl(paste(curr_figure_name, collapse = '|'),Fig)) %>%
dplyr::mutate(Fig = str_replace_all(Fig, " ","_"))
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::mutate(log2_value = log2(value_raw + as.double(prior_to_use)),
value_raw = NULL
) %>%
dplyr::rename(variable = Fig)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("w2","w3","w3"),
"time2"=c("V4","V4","w2")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
timepoints_x_order <- c("V4","w2","w3")
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig2DE"
#contrast_name <- "Fig5D_Myeloid_vsD1"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 2
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
list_of_pvals <- list()
## for each analyte of interest
for(i in unique(data_to_use$Fig)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each analyte of interest
for(i in unique(data_to_use$Fig)){
phen_to_plot <- data_to_use %>%
dplyr::filter(Fig == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$value_raw, na.rm=TRUE)
min_y <- min(phen_to_plot$value_raw, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = TimePoint, y = value_raw)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_beeswarm(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
### reorder list
list_of_plots_ordered <- list()
list_of_plots_ordered[[1]] <- list_of_plots[[1]]
list_of_plots_ordered[[2]] <- list_of_plots[[3]]
list_of_plots_ordered[[3]] <- list_of_plots[[2]]
list_of_plots_ordered[[4]] <- list_of_plots[[4]]
do.call("grid.arrange",c(grobs = list_of_plots_ordered, ncol=num_columns_for_plot))
##### select data
data_to_use <- list_of_datasets[["Figure2F"]] %>%
### calculate averages
group_by(Fig,Markers,TimePoint) %>%
dplyr::summarise(mean = mean(value_raw, na.rm = TRUE))
### To name output files
contrast_name <- "Fig2F"
num_columns_for_plot <- 1
list_of_plots <- list()
## for each analyte of interest
for(i in rev(sort(unique(data_to_use$Fig)))){
phen_to_plot <- data_to_use %>%
dplyr::filter(Fig == i) %>%
dplyr::mutate(Markers = factor(Markers, c(
"EOMES+ T-bet-",
"EOMES- T-bet-",
"EOMES+ T-bet+",
"EOMES- T-bet+")),
TimePoint = factor(TimePoint, c("V4","V4w2","V4w3")))
#### base plot
p <- ggplot(phen_to_plot, aes(x = TimePoint, y = mean, fill = Markers)) +
scale_fill_manual(values = c("#919191","#A9A9A9","#00849D","#7AE4EF"))+
geom_bar(position="stack", stat="identity", color = "black")+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"))
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
cat("\n")
cat(' \n \n') ### this is the key!
MSD for DNA/LION Vaccination V1 D1+4hr & D1+24hr
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["MSD"]] %>%
### filter to only V1 D1, D1_1hr, D2
dplyr::filter(vaccine == "DNA_LION" &
vax_dose == "V1" &
Day %in% c("D1","D1_4h","D2")
)
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,vaccine,vax_dose,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_last(sampleName,"__"),
sampleName = NULL
)
# id_var_of_interest <- "sampleName"
# var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
# ## designate covariates with Day related first, then id
# covariates=c("Day","MacaqueID")
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("V1_D1_4h","V1_D2"),
"time2"=c("V1_D1","V1_D1")
)
## names for panels (one per row of TimePoints_to_compare)
panel_names <- c("+4hr vs D1 (top)","+24hr vs D1 (bottom)")
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig3A_MSD_V1"
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
NOTE some p values equal to zero, set to 10^{-18} in stats table for plotting
#### first updata the stats to have 1e-18 for zeros
p_value_df2 <- p_value_df2 %>%
dplyr::mutate(`V1_D1_4h-V1_D1_pval` = ifelse(`V1_D1_4h-V1_D1_pval` == 0,
as.double(params$prior_for_volcano_p),
`V1_D1_4h-V1_D1_pval`),
`V1_D2-V1_D1_pval` = ifelse(`V1_D2-V1_D1_pval` == 0,
as.double(params$prior_for_volcano_p),
`V1_D2-V1_D1_pval`))
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
### generate contrasts
curr_contrasts <- c()
for(npt in 1:nrow(TimePoints_to_compare)){
cont <- paste0(TimePoints_to_compare$time1[npt],"-",TimePoints_to_compare$time2[npt])
curr_contrasts <- c(curr_contrasts,cont)
}
for(i in seq_along(curr_contrasts)){
c <- curr_contrasts[i]
label.col <- "rowname"
sig.col <- paste0(c,raw_or_adj)
lfc.col <-paste0(c,"_log2FC")
columns_of_interest <- c(label.col,lfc.col,sig.col)
res.sub <- p_value_df2 %>% dplyr::select(one_of(columns_of_interest)) %>% dplyr::filter(!is.na(!!rlang::sym(lfc.col)))
change_lfc_name <- "log2FC"
if(raw_or_adj == "_pval"){
change_sig_name <- "raw-p"
}else{
change_sig_name <- "FDR"
}
colnames(res.sub) <- c(label.col,change_lfc_name,change_sig_name)
cat(paste0("#### ",panel_names[i],"\n"))
#Select top genes by logFC or Significance
no_genes_to_label <- 30
value_to_sort_the_output_dataset <- "p-value"
if (value_to_sort_the_output_dataset=="fold-change") {
res.sub %>% dplyr::arrange(desc(abs(!!rlang::sym(change_lfc_name)))) -> res.sub
} else if (value_to_sort_the_output_dataset=="p-value") {
res.sub %>% dplyr::arrange(!!rlang::sym(change_sig_name)) -> res.sub
}
genes_to_label <- as.character(res.sub[1:no_genes_to_label,label.col])
additional_labels <- ""
additional_labels <- unlist(str_split(additional_labels,","))
filter <- additional_labels %in% res.sub[,label.col]
additional_labels <- additional_labels[filter]
missing_labels <- additional_labels[!filter]
if(!is.na(missing_labels)){
cat("Could not find:\n")
print(missing_labels)
}
use_only_addition_labels <- FALSE
if(use_only_addition_labels){
genes_to_label <- additional_labels
}else{
genes_to_label <- unique(append(genes_to_label, additional_labels))
}
pCutoff = 0.05
FCcutoff = 1.0
significant=as.vector(table(abs( res.sub[,change_lfc_name] ) > FCcutoff &
res.sub[,change_sig_name] < pCutoff))[2]
# fix pvalue == 0
shapeCustom <- rep(19,nrow(res.sub))
maxy <- max(-log10(res.sub[[change_sig_name]]), na.rm=TRUE)
if(0 > 0){
maxy <- 0
}
#cat(paste0("Maxy: ",maxy,"\n"))
if(maxy == Inf){
# Sometimes, pvalues == 0
keep <- res.sub[[change_sig_name]] > 0
res.sub[[change_sig_name]][!keep] <- min(res.sub[[change_sig_name]][keep])
shapeCustom[!keep] <- 17
maxy <- -log10(min(res.sub[[change_sig_name]][keep]))
cat("Some p-values equal zero. Adjusting y-limits.\n")
cat(paste0("Maxy adjusted: ",maxy,"\n"))
}
# By default, nothing will be greater than maxy. User can set this value lower
keep <- -log10(res.sub[[change_sig_name]]) <= maxy
res.sub[[change_sig_name]][!keep] <- maxy
shapeCustom[!keep] <- 17
names(shapeCustom)<- rep("Exact",length(shapeCustom))
names(shapeCustom)[shapeCustom == 17] <- "Adjusted"
#Remove if nothin' doin'
if(all(shapeCustom == 19)){
shapeCustom <- NULL
}
maxy <- ceiling(maxy)
use_custom_xlab <- FALSE
if(use_custom_xlab){
xlab <- gsub("_"," ",change_lfc_name)
}else{
xlab <- bquote(~Log[2]~ "fold change")
}
xlim_additional <- 0
ylim_additional <- 0
axisLabSize <- 12
legendSize <- 10
labSize <- 3
imageWidth = 3000
imageHeight = 3000
dpi = 300
####### enhancedvolcano became unusable 9/28/23
### add column for signif
res.sub <- res.sub %>%
mutate(Significant = ifelse(.data[[change_sig_name]] < pCutoff,
ifelse(abs(.data[[change_lfc_name]]) > FCcutoff, "p-value and log2FC","p-value"),
ifelse(abs(.data[[change_lfc_name]]) > FCcutoff, "log2FC","NS")),
Sig_f = factor(Significant,c("NS","log2FC","p-value","p-value and log2FC")))
color_df <- data.frame("category"=c("NS","log2FC","p-value","p-value and log2FC"),
"colors"=c("grey60", "forestgreen", "royalblue","red"))
colors_to_use <- color_df$colors[which(color_df$category %in% unique(res.sub$Sig_f))]
p <- ggplot(res.sub,aes(.data[[change_lfc_name]],-log10(.data[[change_sig_name]]),color=Sig_f, fill=Sig_f, label=.data[[label.col]])) +
geom_point(size = 1, shape = 21, alpha = 0.8) +
theme_bw() +
geom_text_repel(max.overlaps = 20, size = labSize)+
geom_hline(yintercept = -log10(pCutoff),linetype="dashed") +
geom_vline(xintercept = c(-FCcutoff,FCcutoff),linetype="dashed") +
#scale_x_continuous(limits = c(-15,15))+
scale_color_manual(values=colors_to_use)+
scale_fill_manual(values=colors_to_use)+
theme(legend.position="top", axis.title = element_text(size=axisLabSize), axis.text = element_text(size=axisLabSize),
legend.title = element_blank(), legend.text = element_text(size=legendSize),
panel.border = element_blank(), axis.line = element_line(colour = "black", size = 0.5),
axis.ticks = element_line(size = 0.5), axis.ticks.length = unit(.25,"cm"),
plot.title = element_text(size = axisLabSize, face = "bold"),
plot.caption = element_text(size = legendSize)) +
scale_size(guide = 'none')+ ### remove the size from legend
guides(color = guide_legend(override.aes = list(size=1)),
fill = guide_legend(override.aes = aes(label = "")))+
labs(title = paste0("(Significant=",length(which(res.sub$Significant == "p-value and log2FC")),")"),
caption = paste0("total = ", dim(res.sub)[1], " variables"))
print(p)
cat("\n")
cat(' \n \n') ### this is the key!
}
cat("\n")
cat(' \n \n') ### this is the key!
MSD for DNA/LION Vaccination V4 x D2,D3,D4,D8,D15,D22 vs D1
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["MSD"]] %>%
### filter to only V1 D1, D1_1hr, D2
dplyr::filter(vaccine == "DNA_LION" &
vax_dose == "V4" &
Day %in% c("D1","D2","D3","D4","D8","D15","D22")
)
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,vaccine,vax_dose,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_last(sampleName,"__"),
sampleName = NULL
)
# id_var_of_interest <- "sampleName"
# var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
# ## designate covariates with Day related first, then id
# covariates=c("Day","MacaqueID")
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("V4_D2","V4_D3","V4_D4","V4_D8","V4_D15","V4_D22"),
"time2"=c("V4_D1","V4_D1","V4_D1","V4_D1","V4_D1","V4_D1")
)
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig3B_MSD_V4"
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
### read in degs results
degs_df <- list_of_stats[["Fig3B_MSD_V4"]]
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(str_remove_all(str_remove_all(colnames(.),"-V4_D1_log2FC"),"V4_")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
timepoint_factor_order <- c("D2","D3","D4","D8","D15","D22")
############### annotation colors (set number of dirs)
annotation_colors <- list(Day = c(D2="grey20",D3="grey40",D4="grey60",D8="grey80",D15="grey90",D22="grey100"),
dir1 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir2 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir3 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir4 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir5 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir6 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"))
names(annotation_colors[["Day"]]) <- timepoint_factor_order
names(annotation_colors) <- c("Day",paste0(timepoint_factor_order,"vsD1_dir"))
######### annotation for columns
annotation_col <- data.frame("contrast"= colnames(degs_df_for_hm)) %>%
mutate(Day=factor(contrast, timepoint_factor_order)) %>%
arrange(Day) %>%
column_to_rownames("contrast")
############## add row annotation with sig change for FC
### generate contrasts
curr_contrasts <- c()
for(npt in 1:nrow(TimePoints_to_compare)){
cont <- paste0(TimePoints_to_compare$time1[npt],"-",TimePoints_to_compare$time2[npt])
curr_contrasts <- c(curr_contrasts,cont)
}
annotation_row1 <- data.frame("rowname"=degs_df$rowname)
for(curr_contrast in rev(curr_contrasts)){
### trim to curr contrast
curr_pval_df <- degs_df[,c("rowname",colnames(degs_df)[grepl(curr_contrast,colnames(degs_df))])]
### rename column names to be generic
orig_names <- colnames(curr_pval_df)
simp_names <- ifelse(grepl("_",orig_names),str_after_last(orig_names,"_"),orig_names)
colnames(curr_pval_df) <- simp_names
curr_pval_df <- curr_pval_df %>%
dplyr::mutate(dir = ifelse(pval < as.double(params$pCutoff),
ifelse(log2FC > as.double(params$FCcutoff),"up", ifelse(log2FC > 0, "up_p_only",
ifelse(log2FC < (-1)*as.double(params$FCcutoff),"down",ifelse(log2FC < 0, "down_p_only","p_only")))),"not_sig"))
curr_df <- curr_pval_df[,c("rowname","dir")] %>%
dplyr::mutate(dir = factor(dir, c("up","up_p_only","not_sig","down_p_only","down")))
colnames(curr_df) <- c("rowname",paste0(str_remove_all(str_replace_all(curr_contrast,"-","vs"),"V4_"),"_dir"))
annotation_row1 <- dplyr::left_join(annotation_row1,curr_df, by = "rowname")
}
#### make rownames
annotation_row <- column_to_rownames(annotation_row1)
### sort
curr_ann_row <- dplyr::arrange(annotation_row[rownames(degs_df_for_hm),], annotation_row[rownames(degs_df_for_hm),rev(colnames(annotation_row))])
mycols = diverging_hcl(n=100, palette="Blue-Red 3")
scale_range = 2
breakscale <- seq(-1*scale_range, scale_range, length=100)
p <- pheatmap(degs_df_for_hm[rownames(curr_ann_row),rownames(annotation_col)], scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
#gaps_col = gaps_to_use,
border_color = NA,
cluster_rows = FALSE, cluster_cols = FALSE,
annotation_col = annotation_col,show_colnames = FALSE,
annotation_row = curr_ann_row,
annotation_colors = annotation_colors
)
p
cat("\n")
cat(' \n \n') ### this is the key!
MSD for DNA/LION Vaccination V4 D2vsD1, Spearman association among analyte response
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["MSD"]] %>%
### filter to only V1 D1, D1_1hr, D2
dplyr::filter(vaccine == "DNA_LION" &
vax_dose == "V4" &
Day %in% c("D1","D2")
) %>%
### simplify sampleName
dplyr::mutate(sampleName = str_remove_all(sampleName,"DNA_LION__V4_"))
########## generate "dataset" format - wide, no metadata with columns = samples (including vax, timepoint, animal), rows = analytes
##### has analyte names as rownames AND as column labeled gene
df <- data_to_use %>%
### remove metadata
dplyr::select(-c(MacaqueID,vaccine,vax_dose,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "analyte", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL) %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "sampleName", values_from = "log2_value") %>%
dplyr::mutate(Gene=analyte) %>%
column_to_rownames("analyte") %>%
relocate(Gene)
cytokine_columns <- rownames(df)
## generate metadata only
targetfile_full <- data_to_use %>%
### remove data columns
dplyr::select(-all_of(cytokine_columns))
id_var_of_interest <- "sampleName"
var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
## designate covariates with Day related first, then id
covariates=c("Day","MacaqueID")
contrasts <- c("D2-D1")
contrast_name <- "Fig3C_MSD_V4_D2vsD1"
contrast <- "D2-D1$"
contrast_simp <- "DNA_LION_V4_D2vsD1"
df_original <- diff_counts
sample_of_interest <- grep(contrast, colnames(df_original), value=TRUE)
df1 <- df_original %>% select(Gene,one_of(sample_of_interest))
row.names(df1) <- df1$Gene
df1$Gene <- NULL
df <- as.data.frame(t(as.matrix(df1)))
# Remove
#remove bad columns
# filter <- apply(df, 2, function(z) all(is.na(z))) #remove if all NAs
# df <- df[,!filter]
filterNA <- apply(df, 2, function(z) any(is.na(z))) #remove if any NAs - better to remove one assay than half the samples
removedNA <- colnames(df)[which(filterNA)]
df <- df[,!filterNA]
removed0 <- ""
if(length(which(colSums(df)==0)) >0){
filter0 <- which(colSums(df)==0)
removed0 <- colnames(df)[filter0]
df <- df[,-filter0] #remove if all zeros
}
## remove columns with all identical values across the samples
nonIdentical_colnames <- df %>% dplyr::select(where(~n_distinct(.) > 1)) %>% colnames()
removedIdentical <- setdiff(colnames(df),nonIdentical_colnames)
df <- df %>% dplyr::select(where(~n_distinct(.) > 1))
M <- cor(df, method = "spearman")
p.mat <- cor.mtest(df)$p
## check if these are all not sig
p.mat.adj <- matrix(p.adjust(c(p.mat)), nrow=nrow(p.mat), byrow=FALSE)
colnames(p.mat.adj)=colnames(df)
rownames(p.mat.adj)=colnames(df)
col <- diverging_hcl(n=200, palette="Blue-Red")
cat(paste0("Number of samples in Corrplot: ",nrow(df)),"\n")
Number of samples in Corrplot: 8
cat("\n")
cat(paste0("Number of Analytes in Corrplot: ",ncol(df)),"\n")
Number of Analytes in Corrplot: 41
cat("\n")
cat(paste0("Analytes removed for NA: ", removedNA),"\n")
Analytes removed for NA: IL.8
cat("\n")
cat(paste0("Analytes removed for zeros: ", removed0),"\n")
Analytes removed for zeros:
cat("\n")
cat(paste0("Analytes removed for all identical values: ", removedIdentical),"\n")
Analytes removed for all identical values:
cat("\n")
p_val_levels <- c(0.05, 0.01)
p_val_names <- c("praw","padj")
# for(i in seq_along(p_val_levels)){
# for(j in seq_along(p_val_names)){
i <- 1
j <- 1
if(p_val_names[j] == "praw"){
p.mat_to_use <- p.mat
}else{
p.mat_to_use <- p.mat.adj
}
corrplot(M, method = "ellipse", col = col,
type = "upper", order = "hclust", number.cex = 0, #number.font=1,
addCoef.col = NULL, # Add coefficient of correlation
tl.col = "black", tl.srt = 90, tl.cex=1, cl.cex =1.2,# Text label color and rotation
bg = "white", #background color
title = paste0(p_val_names[j]," < ",p_val_levels[i]), mar=c(0,0,1,0),
# Combine with significance
p.mat = p.mat_to_use,
sig.level = p_val_levels[i], insig = "blank",
# hide correlation coefficient on the principal diagonal
diag = FALSE)
MSD for DNA/LION Vaccination V1, V2, V3, V4 x D1+4h, D2, D4, D8
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["MSD"]] %>%
### filter to only V1 D1, D1_1hr, D2
dplyr::filter(vaccine == "DNA_LION" &
vax_dose %in% c("V1","V2","V3","V4") &
Day %in% c("D1","D1_4h","D2","D4","D8")
)
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,vaccine,vax_dose,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_last(sampleName,"__"),
sampleName = NULL
)
# id_var_of_interest <- "sampleName"
# var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
# ## designate covariates with Day related first, then id
# covariates=c("Day","MacaqueID")
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("V1_D1_4h","V1_D2","V1_D4","V1_D8",
"V2_D1_4h","V2_D2","V2_D4","V2_D8",
"V3_D1_4h","V3_D2","V3_D4","V3_D8",
"V4_D2","V4_D4","V4_D8"),
"time2"=c("V1_D1","V1_D1","V1_D1","V1_D1",
"V2_D1","V2_D1","V2_D1","V2_D1",
"V3_D1","V3_D1","V3_D1","V3_D1",
"V4_D1","V4_D1","V4_D1")
)
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "FigS1A_MSD"
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
### read in degs results
degs_df <- list_of_stats[["FigS1A_MSD"]]
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(str_remove_all(str_remove_all(str_remove_all(str_remove_all(
str_remove_all(colnames(.),"D1_log2FC"),
"-V1_"),"-V2_"),"-V3_"),"-V4_")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
#### add in gap for missing
degs_df_for_hm[,"V4_D1_4h"] <- NA
timepoint_factor_order <- c("D1_4h","D2","D4","D8")
############### annotation colors (set number of dirs)
annotation_colors <- list(Vaccine = c(V1="#038a7a",V2="#78038f",V3="goldenrod1",V4="#e615a0"),
Day = c(D1_4h="grey20",D2="grey40",D4="grey60",D8="grey80"))
######### annotation for columns
annotation_col <- data.frame("contrast"= colnames(degs_df_for_hm)) %>%
dplyr::mutate(
Day=factor(str_after_first(contrast,"_"),timepoint_factor_order),
Vaccine = factor(str_before_first(contrast,"_"),c("V1","V2","V3","V4"))) %>%
dplyr::arrange(Vaccine,Day) %>%
column_to_rownames("contrast")
############## no row annotation
gaps_col_to_use <- c(4,8,12)
newOrder_row <- read.table(file.path(outputTABLES_dir,paste0(contrast_name,"_rowlist_ordered_hm_ForSpin.txt")), header=T)
newOrder_row_v <-as.vector(newOrder_row[,3])
p <- pheatmap(degs_df_for_hm[,rownames(annotation_col)], scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
cluster_rows=dendextend::rotate(myMap$tree_row, newOrder_row_v),
gaps_col = gaps_col_to_use,
border_color = NA,
#cluster_rows = FALSE,
cluster_cols = FALSE,
annotation_col = annotation_col,show_colnames = FALSE,
#annotation_row = curr_ann_row,
annotation_colors = annotation_colors
)
p
cat("\n")
cat(' \n \n') ### this is the key!
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["MSD"]] %>%
### filter to only DNA_LION
### all timepoints for each dose
dplyr::filter(vaccine == "DNA_LION" &
vax_dose %in% c("V1","V2","V3","V4")
)
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,vaccine,vax_dose,Day)) %>%
### make longer
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### break out day & macaque ID
dplyr::mutate(vax_dose_day = factor(str_after_last(sampleName,"__"),
c("V1_D1","V1_D1_4h","V1_D2","V1_D4","V1_D8",
"V2_D1","V2_D1_4h","V2_D2","V2_D4","V2_D8",
"V3_D1","V3_D1_4h","V3_D2","V3_D4","V3_D8",
"V4_D1","V4_D2","V4_D3","V4_D4","V4_D8",
"V4_D15","V4_D22")),
### to add spaces
vax_dose_day2 = ifelse(grepl("V1",vax_dose_day),as.numeric(vax_dose_day),
ifelse(grepl("V2",vax_dose_day),as.numeric(vax_dose_day)+1,
ifelse(grepl("V3",vax_dose_day),as.numeric(vax_dose_day)+2,
as.numeric(vax_dose_day)+3))),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL,
### add cat for spacing between vax doses
)
selected_analytes <- c("IL.15","CXCL10.IP.10",
"IFN.g","CCL4.MIP.1b",
"IL.1Ra","IL.6",
"FLT3L","CXCL13",
"CXCL11.ITAC","CCL2.MCP.1")
### To name output files
contrast_name <- "FigS1B_MSD"
num_columns_for_plot <- 2
list_of_plots_mean <- list()
list_of_plots_median <- list()
## for each analyte of interest
for(i in selected_analytes){
phen_to_plot <- df_long %>%
dplyr::filter(variable == i)
#### base plot
p <- ggplot(phen_to_plot, aes(x = vax_dose_day2, y = raw_value, group = MacaqueID)) +
geom_line(color = "grey60")+
## adds spaces in x axis
scale_x_continuous(breaks = unique(df_long$vax_dose_day2),
#labels = str_replace(levels(phen_to_plot$vax_dose_day),"_","\n")
labels = str_replace(levels(phen_to_plot$vax_dose_day),"_",", "))+
labs(title = i)+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
p_mean <- p +
stat_summary(aes(group = 1, y = raw_value), fun=mean, geom="line", colour="red")
p_median <- p +
stat_summary(aes(group = 1, y = raw_value), fun=median, geom="line", colour="red")
# plot(p)
list_of_plots_mean[[i]] <- p_mean
list_of_plots_median[[i]] <- p_median
cat("\n")
cat(' \n \n') ### this is the key!
}
cat("#### ",paste0("Mean"),"\n")
cat('\n')
do.call("grid.arrange",c(list_of_plots_mean, ncol=num_columns_for_plot))
cat("\n")
cat(' \n \n') ### this is the key!
cat("#### ",paste0("Median"),"\n")
cat('\n')
do.call("grid.arrange",c(list_of_plots_median, ncol=num_columns_for_plot))
cat("\n")
cat(' \n \n') ### this is the key!
MSD for DNA/LION Vaccination V4 x D2, D4, D8, D15 & D22 vs D1
### stats are the same as for Fig3B_MSD_V4
list_of_stats[["FigS1C_MSD_V4"]] <- list_of_stats[["Fig3B_MSD_V4"]]
degs_df <- list_of_stats[["FigS1C_MSD_V4"]]
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(str_remove_all(str_remove_all(colnames(.),"-V4_D1_log2FC"),"V4_")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("V4_D2","V4_D3","V4_D4","V4_D8","V4_D15","V4_D22"),
"time2"=c("V4_D1","V4_D1","V4_D1","V4_D1","V4_D1","V4_D1")
)
timepoint_factor_order <- c("D2","D3","D4","D8","D15","D22")
############## use row annotation with sig categories as alluvial df
### generate contrasts
curr_contrasts <- c()
for(npt in 1:nrow(TimePoints_to_compare)){
cont <- paste0(TimePoints_to_compare$time1[npt],"-",TimePoints_to_compare$time2[npt])
curr_contrasts <- c(curr_contrasts,cont)
}
annotation_row1 <- data.frame("rowname"=degs_df$rowname)
for(curr_contrast in rev(curr_contrasts)){
### trim to curr contrast
curr_pval_df <- degs_df[,c("rowname",colnames(degs_df)[grepl(curr_contrast,colnames(degs_df))])]
### rename column names to be generic
orig_names <- colnames(curr_pval_df)
simp_names <- ifelse(grepl("_",orig_names),str_after_last(orig_names,"_"),orig_names)
colnames(curr_pval_df) <- simp_names
curr_pval_df <- curr_pval_df %>%
dplyr::mutate(dir = ifelse(pval < as.double(params$pCutoff),
ifelse(log2FC > as.double(params$FCcutoff),"up", ifelse(log2FC > 0, "up_p_only",
ifelse(log2FC < (-1)*as.double(params$FCcutoff),"down",ifelse(log2FC < 0, "down_p_only","p_only")))),"not_sig"))
curr_df <- curr_pval_df[,c("rowname","dir")] %>%
dplyr::mutate(dir = factor(dir, c("up","up_p_only","not_sig","down_p_only","down")))
colnames(curr_df) <- c("rowname",paste0(str_remove_all(str_replace_all(curr_contrast,"-","vs"),"V4_"),"_dir"))
annotation_row1 <- dplyr::left_join(annotation_row1,curr_df, by = "rowname")
}
#### make rownames
annotation_row <- column_to_rownames(annotation_row1)
######### make long dataframe to plot
combo_df_long <- annotation_row %>%
### remove rows with all NA
filter(if_any(everything(), ~ !is.na(.))) %>%
rownames_to_column("Gene") %>%
tidyr::pivot_longer(!Gene, names_to = "timepoint", values_to = "dir") %>%
dplyr::mutate(timepoint = factor(str_remove_all(timepoint,"_dir"),c("D2vsD1","D3vsD1","D4vsD1","D8vsD1","D15vsD1","D22vsD1")),
Gene = factor(Gene))
###### set colors
color_df <- data.frame("group"=unique(levels(combo_df_long$dir)),
"color"=c("red","lightpink","grey85","skyblue","blue"))
curr_color_df <- color_df[which(color_df$group %in% unique(combo_df_long$dir)),]
####### plot
gg <- ggplot(combo_df_long,
aes(x = timepoint, stratum = dir, alluvium = Gene))+
stat_alluvium(aes(fill = dir), width = .65, alpha = 1/2,) +
stat_stratum(aes(stratum = dir, fill = dir), width = .65) +
stat_alluvium(geom = "text", aes(label = Gene), size = 6.3) +
scale_fill_manual(values = curr_color_df$color)+
theme_bw()+
theme(axis.text=element_text(size=20),legend.text=element_text(size=20),
axis.title=element_text(size=20,face="bold"))
#ggtitle(paste0(tissue_to_use))
plot(gg)
cat("\n")
cat(' \n \n') ### this is the key!
MSD for DNA/LION Vaccination & DNA Vaccination V1, V2 x D2, D4, D8
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["MSD"]] %>%
### filter to only V1 D1, D1_1hr, D2
dplyr::filter(vaccine %in% c("DNA_LION","DNA") &
vax_dose %in% c("V1","V2") &
Day %in% c("D1","D2","D4","D8")
)
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,vaccine,vax_dose,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_first(sampleName,"__"), ### includes the vaccine
sampleName = NULL
)
# id_var_of_interest <- "sampleName"
# var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
# ## designate covariates with Day related first, then id
# covariates=c("Day","MacaqueID")
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("DNA_LION__V1_D2","DNA_LION__V1_D4","DNA_LION__V1_D8",
"DNA_LION__V2_D2","DNA_LION__V2_D4","DNA_LION__V2_D8",
"DNA__V1_D2","DNA__V1_D4","DNA__V1_D8",
"DNA__V2_D2","DNA__V2_D4","DNA__V2_D8"),
"time2"=c("DNA_LION__V1_D1","DNA_LION__V1_D1","DNA_LION__V1_D1",
"DNA_LION__V2_D1","DNA_LION__V2_D1","DNA_LION__V2_D1",
"DNA__V1_D1","DNA__V1_D1","DNA__V1_D1",
"DNA__V2_D1","DNA__V2_D1","DNA__V2_D1")
)
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "FigS2B_MSD"
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
### read in degs results
degs_df <- list_of_stats[["FigS2B_MSD"]]
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(
str_remove_all(
str_remove_all(
str_remove_all(
str_remove_all(colnames(.),"V1_D1_log2FC"),
"V2_D1_log2FC"),"-DNA_LION__"),"-DNA__")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
timepoint_factor_order <- c("D2","D4","D8")
############### annotation colors (set number of dirs)
annotation_colors <- list(Vaccine = c(DNA_LION="darkgoldenrod",DNA="darkolivegreen"),
Dose = c(V1="#038a7a",V2="#78038f"),
Day = c(D2="grey40",D4="grey60",D8="grey80"))
######### annotation for columns
annotation_col <- data.frame("contrast"= colnames(degs_df_for_hm)) %>%
dplyr::mutate(
Vaccine = factor(str_before_first(contrast,"__"),c("DNA_LION","DNA")),
Dose = factor(str_before_first(str_after_first(contrast,"__"),"_"),c("V1","V2")),
Day=factor(str_after_last(contrast,"_"),timepoint_factor_order)) %>%
dplyr::arrange(Vaccine,Dose,Day) %>%
column_to_rownames("contrast")
############## no row annotation
gaps_col_to_use <- c(3,6,9)
newOrder_row <- read.table(file.path(outputTABLES_dir,paste0(contrast_name,"_rowlist_ordered_hm_ForSpin.txt")), header=T)
newOrder_row_v <-as.vector(newOrder_row[,3])
p <- pheatmap(degs_df_for_hm[,rownames(annotation_col)], scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
cluster_rows=dendextend::rotate(myMap$tree_row, newOrder_row_v),
gaps_col = gaps_col_to_use,
border_color = NA,
#cluster_rows = FALSE,
cluster_cols = FALSE,
annotation_col = annotation_col,show_colnames = FALSE,
#annotation_row = curr_ann_row,
annotation_colors = annotation_colors
)
p
cat("\n")
cat(' \n \n') ### this is the key!
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 0
##### select data
data_to_use <- list_of_datasets[["MSD"]] %>%
### filter to only DNA_LION
### only V1, V2; D1,D2,D4,D8
dplyr::filter(vaccine %in% c("DNA_LION","DNA") &
vax_dose %in% c("V1","V2") &
Day %in% c("D1","D2","D4","D8")
)
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,vaccine,vax_dose,Day)) %>%
### make longer
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### break out day & macaque ID
dplyr::mutate(vax_dose_day = factor(str_replace_all(str_after_first(sampleName,"__"),"DNA_LION","DNALION"),
c("DNALION__V1_D1","DNALION__V1_D2","DNALION__V1_D4","DNALION__V1_D8",
"DNALION__V2_D1","DNALION__V2_D2","DNALION__V2_D4","DNALION__V2_D8",
"DNA__V1_D1","DNA__V1_D2","DNA__V1_D4","DNA__V1_D8",
"DNA__V2_D1","DNA__V2_D2","DNA__V2_D4","DNA__V2_D8")),
### to add spaces
vax_dose_day2 = ifelse(grepl("DNALION__V1",vax_dose_day),as.numeric(vax_dose_day),
ifelse(grepl("DNALION__V2",vax_dose_day),as.numeric(vax_dose_day)+1,
ifelse(grepl("DNA__V1",vax_dose_day),as.numeric(vax_dose_day)+2,
as.numeric(vax_dose_day)+3))),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL,
### add cat for spacing between vax doses
)
selected_analytes <- c("CX3CL1.Fractalkine","CXCL11.ITAC","IL.15","IL.7",
"IL.1Ra","CXCL10.IP.10","CXCL13","CCL2.MCP.1",
"FLT3L","IL.18")
### To name output files
contrast_name <- "FigS2C_MSD"
num_columns_for_plot <- 4
list_of_plots <- list()
## for each analyte of interest
for(i in selected_analytes){
phen_to_plot <- df_long %>%
dplyr::filter(variable == i)
#### base plot
p <- ggplot(phen_to_plot, aes(x = vax_dose_day2, y = raw_value, group = MacaqueID)) +
geom_line(color = "grey20")+
geom_point()+
## adds spaces in x axis
scale_x_continuous(breaks = unique(df_long$vax_dose_day2),
#labels = str_replace(levels(phen_to_plot$vax_dose_day),"_","\n")
labels = str_replace(levels(phen_to_plot$vax_dose_day),"__",", "))+
labs(title = i)+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
cat("\n")
cat(' \n \n') ### this is the key!
Broad cell populations vs Time
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig4ABC"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__")))) %>%
### remove the control rows that are only there for Fig4E
dplyr::filter(!grepl("control",MacaqueID))
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_first(sampleName,"__"), ### includes the vaccine
sampleName = NULL
)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("D3","D8","D22",
"D8","D22","D22"),
"time2"=c("D1","D1","D1",
"D3","D3","D8")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
timepoints_x_order <- c("D1","D3","D8","D22")
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig4C"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 3
### order for scatterplots
variables_ordered <- c("HLADRpos_Myeloid","B","T")
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),variables_ordered),
Day = factor(str_after_first(sampleName,"__"),timepoints_x_order),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_beeswarm(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
B:T cell ratio in Blood vs LN
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig4D"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__")))) %>%
### remove the control rows that are only there for Fig4E
dplyr::filter(!grepl("control",MacaqueID))
### generate dataframe: rows = samples, columns = variables (including timepoint) and group for comparison (= compartment)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_before_first(variable,"__"), ### call timepoint the variable that you want to compare
variable = paste0(str_after_first(variable,"__"),"__",str_after_first(sampleName,"__")), ### add timepoint to variable
sampleName = NULL
)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("LN"),
"time2"=c("blood")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig4D"
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### break out compartment and variable
dplyr::mutate(compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = str_after_first(variable,"__"),
Day = factor(str_after_first(sampleName,"__"),c("D1","D3","D8","D22")),
MacaqueID = str_before_first(sampleName,"__"),
var_tp = paste0(variable, "__", Day),
sampleName = NULL) %>%
dplyr::arrange(Day, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
plot_sig_only <- "no"
phen_to_plot <- df_long
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.08*(num_contrasts+1)),
max_y*(1+0.08*(num_contrasts+1))
)
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = raw_value)) +
scale_fill_manual(values=c("white","black")) +
geom_beeswarm(aes(fill=compartment), color = "black", size = 2, shape=21, alpha=0.8, cex = 3, dodge.width = 0.7)+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:length(list_of_pvals)){
currpvals <- list_of_pvals[[l]] %>%
dplyr::mutate(x = as.numeric(l),
xmin = x-0.2,
xmax = x+0.2)
p <- p +
stat_pvalue_manual(currpvals[1,], label = "p",size = 2.5,
y.position = max_y+0.1*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
plot(p)
cat("\n")
cat(' \n \n') ### this is the key!
B-cell folicle IHC
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig4E"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__"))))
##### controls are 3 different animals for GCtoNonGC only, not used for stats
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_first(sampleName,"__"), ### includes the vaccine
sampleName = NULL
)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("D3","D8","D22",
"D8","D22","D22"),
"time2"=c("D1","D1","D1",
"D3","D3","D8")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
timepoints_x_order <- c("D1","D3","D8","D22")
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig4E"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 3
### variables ordered in plots
variables_ordered <- c("LN__BCF_GC","LN__BCF_noGC","LN__GCtoNonGC_ratio")
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),variables_ordered),
Day = factor(str_after_first(sampleName,"__"),timepoints_x_order),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_beeswarm(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
CD4 and CD8 T cells
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "FigS3"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__")))) %>%
### remove the control rows that are only there for Fig4E
dplyr::filter(!grepl("control",MacaqueID))
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_first(sampleName,"__"), ### includes the vaccine
sampleName = NULL
)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("D3","D8","D22",
"D8","D22","D22"),
"time2"=c("D1","D1","D1",
"D3","D3","D8")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
timepoints_x_order <- c("D1","D3","D8","D22")
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "FigS3"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 2
### variables ordered in plots
variables_ordered <- c("CD4","CD8")
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),variables_ordered),
Day = factor(str_after_first(sampleName,"__"),timepoints_x_order),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_beeswarm(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
Myeloid populations
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig5"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__")))) %>%
### remove the control rows that are only there for Fig4E
dplyr::filter(!grepl("control",MacaqueID))
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_first(sampleName,"__"), ### includes the vaccine
sampleName = NULL
)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("D3","D8","D22",
"D8","D22","D22"),
"time2"=c("D1","D1","D1",
"D3","D3","D8")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig5ABC"
#contrast_name <- "Fig5D_Myeloid_vsD1"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 3
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
varcomp_to_plot <- c("blood__Pct_of_HLADRpos_HLA_DRpos_Ki67pos","LN__Pct_of_HLADRpos_HLA_DRpos_Ki67pos")
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### trim to only the vars in this panel
dplyr::filter(variable %in% varcomp_to_plot) %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),varcomp_to_plot),
Day = factor(str_after_first(sampleName,"__"),c("D1","D3","D8","D22")),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_point(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
geom_line(aes(group = MacaqueID), color = "grey30")+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
varcomp_to_plot <- c("blood__Pct_of_HLADRpos_HLA_DRpos_Ki67pos","LN__Pct_of_HLADRpos_HLA_DRpos_Ki67pos",
"blood__Pct_of_HLADRpos_pDC_Ki67pos","LN__Pct_of_HLADRpos_pDC_Ki67pos",
"blood__Pct_of_HLADRpos_cDC_Ki67pos","LN__Pct_of_HLADRpos_cDC_Ki67pos",
"blood__Pct_of_HLADRpos_Classical_Mo_Ki67pos","LN__Pct_of_HLADRpos_Classical_Mo_Ki67pos")
### read in degs results
degs_df <- list_of_stats[[paste0(contrast_name)]] %>%
#### trim to only the vars in the figure panel
dplyr::filter(rowname %in% varcomp_to_plot)
#### for hm only want the vs D1
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("D1_log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(str_remove_all(colnames(.),"-D1_log2FC")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
timepoint_factor_order <- c("D3","D8","D22")
############### annotation colors (set number of dirs)
annotation_colors <- list(Day = c(D3="grey40",D8="grey80",D22="grey90"),
dir1 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir2 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir3 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir4 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir5 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir6 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue")
)
names(annotation_colors[["Day"]]) <- timepoint_factor_order
names(annotation_colors) <- c("Day",paste0(str_replace_all(TimePoints_to_compare$name,"-","vs"),"_dir"))
######### annotation for columns
annotation_col <- data.frame("contrast"= colnames(degs_df_for_hm)) %>%
mutate(Day=factor(contrast, timepoint_factor_order)) %>%
arrange(Day) %>%
column_to_rownames("contrast")
############## add row annotation with sig change for FC
### generate contrasts
curr_contrasts <- c()
for(npt in 1:nrow(TimePoints_to_compare)){
cont <- paste0(TimePoints_to_compare$time1[npt],"-",TimePoints_to_compare$time2[npt])
curr_contrasts <- c(curr_contrasts,cont)
}
annotation_row1 <- data.frame("rowname"=degs_df$rowname)
for(curr_contrast in rev(curr_contrasts)){
### trim to curr contrast
curr_pval_df <- degs_df[,c("rowname",colnames(degs_df)[grepl(curr_contrast,colnames(degs_df))])]
### rename column names to be generic
orig_names <- colnames(curr_pval_df)
simp_names <- ifelse(grepl("_",orig_names),str_after_last(orig_names,"_"),orig_names)
colnames(curr_pval_df) <- simp_names
curr_pval_df <- curr_pval_df %>%
dplyr::mutate(dir = ifelse(pval < as.double(params$pCutoff),
ifelse(log2FC > as.double(params$FCcutoff),"up", ifelse(log2FC > 0, "up_p_only",
ifelse(log2FC < (-1)*as.double(params$FCcutoff),"down",ifelse(log2FC < 0, "down_p_only","p_only")))),"not_sig"))
curr_df <- curr_pval_df[,c("rowname","dir")] %>%
dplyr::mutate(dir = factor(dir, c("up","up_p_only","not_sig","down_p_only","down")))
colnames(curr_df) <- c("rowname",paste0(str_remove_all(str_replace_all(curr_contrast,"-","vs"),"V4_"),"_dir"))
annotation_row1 <- dplyr::left_join(annotation_row1,curr_df, by = "rowname")
}
#### make rownames
annotation_row <- column_to_rownames(annotation_row1)
### sort to specified order
curr_ann_row <- annotation_row[varcomp_to_plot,]
mycols = diverging_hcl(n=100, palette="Blue-Red 3")
scale_range = 2
breakscale <- seq(-1*scale_range, scale_range, length=100)
p <- pheatmap(degs_df_for_hm[rownames(curr_ann_row),rownames(annotation_col)], scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
#gaps_col = gaps_to_use,
border_color = NA,
cluster_rows = FALSE, cluster_cols = FALSE,
annotation_col = annotation_col,show_colnames = FALSE,
annotation_row = curr_ann_row,
annotation_colors = annotation_colors
)
p
cat("\n")
cat(' \n \n') ### this is the key!
varcomp_to_plot <- c("blood__Pct_of_HLADRpos_Intermediate_Mo_Ki67pos","LN__Pct_of_HLADRpos_Intermediate_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_Non_classical_Mo_Ki67pos","LN__Pct_of_HLADRpos_Non_classical_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_DC3_Ki67pos","LN__Pct_of_HLADRpos_DC3_Ki67pos",
"blood__Pct_of_HLADRpos_cDC1_Ki67pos","LN__Pct_of_HLADRpos_cDC1_Ki67pos",
"blood__Pct_of_HLADRpos_cDC2_Ki67pos","LN__Pct_of_HLADRpos_cDC2_Ki67pos")
num_columns_for_plot <- 5
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### trim to only the vars in this panel
dplyr::filter(variable %in% varcomp_to_plot) %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),varcomp_to_plot),
Day = factor(str_after_first(sampleName,"__"),c("D1","D3","D8","D22")),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_point(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
geom_line(aes(group = MacaqueID), color = "grey30")+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
### read in degs results
degs_df <- list_of_stats[[paste0(contrast_name)]] %>%
#### trim to only the vars in the figure panel
dplyr::filter(rowname %in% varcomp_to_plot)
#### for hm only want the vs D1
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("D1_log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(str_remove_all(colnames(.),"-D1_log2FC")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
timepoint_factor_order <- c("D3","D8","D22")
############### annotation colors (set number of dirs)
annotation_colors <- list(Day = c(D3="grey40",D8="grey80",D22="grey90"),
dir1 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir2 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir3 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir4 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir5 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir6 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue")
)
names(annotation_colors[["Day"]]) <- timepoint_factor_order
names(annotation_colors) <- c("Day",paste0(str_replace_all(TimePoints_to_compare$name,"-","vs"),"_dir"))
######### annotation for columns
annotation_col <- data.frame("contrast"= colnames(degs_df_for_hm)) %>%
mutate(Day=factor(contrast, timepoint_factor_order)) %>%
arrange(Day) %>%
column_to_rownames("contrast")
############## add row annotation with sig change for FC
### generate contrasts
curr_contrasts <- c()
for(npt in 1:nrow(TimePoints_to_compare)){
cont <- paste0(TimePoints_to_compare$time1[npt],"-",TimePoints_to_compare$time2[npt])
curr_contrasts <- c(curr_contrasts,cont)
}
annotation_row1 <- data.frame("rowname"=degs_df$rowname)
for(curr_contrast in rev(curr_contrasts)){
### trim to curr contrast
curr_pval_df <- degs_df[,c("rowname",colnames(degs_df)[grepl(curr_contrast,colnames(degs_df))])]
### rename column names to be generic
orig_names <- colnames(curr_pval_df)
simp_names <- ifelse(grepl("_",orig_names),str_after_last(orig_names,"_"),orig_names)
colnames(curr_pval_df) <- simp_names
curr_pval_df <- curr_pval_df %>%
dplyr::mutate(dir = ifelse(pval < as.double(params$pCutoff),
ifelse(log2FC > as.double(params$FCcutoff),"up", ifelse(log2FC > 0, "up_p_only",
ifelse(log2FC < (-1)*as.double(params$FCcutoff),"down",ifelse(log2FC < 0, "down_p_only","p_only")))),"not_sig"))
curr_df <- curr_pval_df[,c("rowname","dir")] %>%
dplyr::mutate(dir = factor(dir, c("up","up_p_only","not_sig","down_p_only","down")))
colnames(curr_df) <- c("rowname",paste0(str_remove_all(str_replace_all(curr_contrast,"-","vs"),"V4_"),"_dir"))
annotation_row1 <- dplyr::left_join(annotation_row1,curr_df, by = "rowname")
}
#### make rownames
annotation_row <- column_to_rownames(annotation_row1)
### sort to specified order
curr_ann_row <- annotation_row[varcomp_to_plot,]
mycols = diverging_hcl(n=100, palette="Blue-Red 3")
scale_range = 2
breakscale <- seq(-1*scale_range, scale_range, length=100)
p <- pheatmap(degs_df_for_hm[rownames(curr_ann_row),rownames(annotation_col)], scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
#gaps_col = gaps_to_use,
border_color = NA,
cluster_rows = FALSE, cluster_cols = FALSE,
annotation_col = annotation_col,show_colnames = FALSE,
annotation_row = curr_ann_row,
annotation_colors = annotation_colors
)
p
cat("\n")
cat(' \n \n') ### this is the key!
Spearman correlation between MSD D2vsD1 and Myeloid populations
########## generate "dataset" format - wide, no metadata with columns = samples (including vax, timepoint, animal), rows = analytes
##### has analyte names as rownames AND as column labeled gene
df <- data_to_use %>%
### remove metadata
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "analyte", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL) %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "sampleName", values_from = "log2_value") %>%
dplyr::mutate(Gene=analyte) %>%
column_to_rownames("analyte") %>%
relocate(Gene)
analyte_columns <- rownames(df)
## generate metadata only
targetfile_full <- data_to_use %>%
### remove data columns
dplyr::select(-all_of(analyte_columns))
id_var_of_interest <- "sampleName"
var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
## designate covariates with Day related first, then id
covariates=c("Day","MacaqueID")
contrasts <- c("D3-D1","D8-D1","D22-D1")
contrast_name <- "Fig5DE_Myeloid"
raw_or_adj <- "raw"
### which dataset levels/FC, trimmed to the tab
curr_ds <- list_of_diff_counts[[paste0(contrast_name)]] %>%
column_to_rownames("Gene")
###### MSD = D2-D1 only log2FC
curr_df2 <- data.frame(t(list_of_diff_counts[[paste0("Fig3C_MSD_V4_D2vsD1")]])) %>%
### make row 1 colnames
janitor::row_to_names(row_number = 1) %>%
### trim to only D2-D1
rownames_to_column("sampleName") %>%
dplyr::filter(grepl("D2-D1",sampleName)) %>%
### be sure sorted
dplyr::arrange(sampleName) %>%
column_to_rownames("sampleName") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
set_names_to_use <- c(paste0(c("D3-D1","D8-D1","D22-D1"),"_",str_after_first(params$log2_or_raw,"value_"),"FC"))
corr_tag_name <- "CorrVsMSD"
######## calculate correlation
cor_method <- "spearman"
CorrName <- paste0(contrast_name,"__",corr_tag_name)
#### generate empty dataframe for combo
fullcombo_df_long <- data.frame()
for(set_name in set_names_to_use){
curr_df1 <- data.frame(t(curr_ds)) %>%
### trim to only tp of interest
rownames_to_column("sampleName") %>%
dplyr::filter(grepl(str_before_first(set_name,"_"),sampleName)) %>%
### be sure sorted
dplyr::arrange(sampleName) %>%
column_to_rownames("sampleName") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
## remove rows with all NAs
filter(if_any(everything(), ~ !is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
## generate cor matrix
cor_mat <- cor(as.matrix(signif(curr_df1,7)), as.matrix(signif(curr_df2,7)), method = cor_method, use="pa")
## raw p
cor_test_mat <- data.frame(matrix(NA, nrow=dim(curr_df1)[2], ncol=dim(curr_df2)[2]))
rownames(cor_test_mat) <- colnames(curr_df1)
colnames(cor_test_mat) <- colnames(curr_df2)
for(curr1 in colnames(curr_df1)) {
for(curr2 in colnames(curr_df2)) {
cor_test_mat[curr1, curr2] <-
cor.test(as.matrix(signif(curr_df1,7)[,curr1]),
as.matrix(signif(curr_df2,7)[,curr2]),
method = cor_method, use="pa")$p.value
}
}
## adj p for multiple comparisons by BH over cytokine assays
cor_test_adj_mat <- cor_test_mat #duplicate dataframe to replace values
for(curr_assay in colnames(curr_df1)) {
cor_test_adj_mat[curr_assay,] <- p.adjust(cor_test_mat[curr_assay,], method = "BH")
}
### write function for converting the cor_mat to long format df
corr_to_long <- function(mat){
data.frame(mat) %>%
rownames_to_column("celltype") %>%
tidyr::pivot_longer(!celltype, names_to = "assay", values_to = "value") %>%
dplyr::mutate(celltype_assay = paste0(celltype,"_vs_",assay))
}
#### convert to long
cor_long <- corr_to_long(cor_mat)
if(raw_or_adj == "raw"){
p_long <- corr_to_long(cor_test_mat)
}else{
p_long <- corr_to_long(cor_test_adj_mat)
}
#### combine cor and p long dfs
curr_df_long <- dplyr::left_join(cor_long,p_long, by = c("celltype","assay","celltype_assay")) %>%
### rename values to be useful
dplyr::rename(corr_coef=value.x,p=value.y) %>%
### add column with set_name
dplyr::mutate(timepoint = paste0(str_replace_all(set_name,"-","vs")))
### add to fullcombo
fullcombo_df_long <- rbind(fullcombo_df_long,curr_df_long)
}
### add to stats master list
list_of_stats[[paste0(CorrName)]] <- fullcombo_df_long %>%
dplyr::relocate(celltype, assay, corr_coef, p, timepoint)
barbara_order <- c("blood__Pct_of_HLADRpos_HLA_DRpos_Ki67pos",
"blood__Pct_of_HLADRpos_Classical_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_Intermediate_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_Non_classical_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_DC3_Ki67pos"
)
## only for the Intervals
timepoints_to_use <- paste0(c("D3-D1","D8-D1","D22-D1"),"_",str_after_first(params$log2_or_raw,"value_"),"FC")
TP_name <- "Intervals"
corr_tag_name <- "CorrVsMSD"
raw_or_adj <- "raw"
cor_method <- "spearman"
trim_pval_level <- 0.05
all_or_selected <- "selected"
assay_arrangement <- c("CCL19.MIP.3b","CCL24.Eotaxin.2","CCL26.Eotaxin.3","CCL27.CTACK","CXCL13",
"FLT3L","CCL13.MCP.4","CXCL12.SDF.1a","CCL11.Eotaxin","CCL22.MDC",
#"IL.17B",
"IL.9") ### if trimming/ordering assays
### set pos/neg/both
directions_to_use <- c("pos")
assays_to_exclude <- c("")
raw p < 0.05
#### make df for plotting
df_long1 <- fullcombo_df_long %>%
### generate dir column
dplyr::mutate(dir = ifelse(p < trim_pval_level,
ifelse(corr_coef > 0, "pos","neg"),
"notSig"),
### break out compartment
compartment = str_before_first(celltype,"__"),
celltype = str_after_first(celltype,"__"),
celltype_assay = str_after_first(celltype_assay,"__")) %>%
### remove the corr and p vals for pivoting
dplyr::select(-c(corr_coef,p))
###### set up inital dataframe
## trim to only sig
bub_df2 <- df_long1[which(df_long1$dir %in% directions_to_use ),] %>%
### also remove any analytes_to_exclude
dplyr::filter(!assay %in% assays_to_exclude) %>%
dplyr::mutate(timepoint = factor(str_before_first(timepoint,"_"),str_replace_all(str_before_first(timepoints_to_use,"_"),"-","vs")),
dir = factor(dir, directions_to_use),
##### add compartment to celltype
comp_celltype = paste0(compartment,"__",celltype))
#### remove timepoint suffix from celltype (only needed for vs Abs/NAbs)
cleaned_celltypes <- bub_df2$comp_celltype
for(tu in timepoints_to_use){
cleaned_celltypes <- str_remove_all(cleaned_celltypes,paste0("__",str_replace_all(tu,"-","vs")))
}
bub_df2$celltype <- cleaned_celltypes
#### if selected tab combos, keep cell pops on the x-axis
bub_df_bothordered <- bub_df2 %>%
#### trim to desired assays
dplyr::filter(assay %in% assay_arrangement) %>%
### remove factor levels for assay
dplyr::mutate(assay = factor(as.character(assay),assay_arrangement),
### also breakout for celltype
celltype_only = str_after_first(celltype,"__"),
compartment = factor(compartment, c("blood","LN"))) %>%
dplyr::arrange(assay) %>%
#### trim to barbara_order celltypes
dplyr::filter(celltype %in% barbara_order) %>%
## lock in factor levels for assay (rev for y axis since want it to be reversed)
dplyr::mutate(assay = factor(assay, rev(unique(assay))),
### now get celltype order
celltype = factor(celltype, intersect(barbara_order,unique(celltype)))) %>%
#### now arrange by timepoint to be able to see bulls eyes
dplyr::arrange(desc(timepoint))
###### reversed axes!!!
bba <-
ggplot(bub_df_bothordered, aes(x = celltype, y = assay))+
geom_point(aes(size = timepoint, color = timepoint))+
#scale_alpha_manual(values = alpha_values)+
scale_size_manual(values = c(3,6,10))+
scale_color_manual(values = c("#262626","#7F7F7F","#BFBFBF"))+
theme_bw()+
### make legend symbol bigger & fix color
# guides(color = guide_legend(override.aes = list(size = 5)),
# size = guide_legend(override.aes = list(color = leg_color)))+
## remove gridlines
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
## rotate axis labels
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plot(bba)
cat("\n")
cat(' \n \n') ### this is the key!
### last plot in loop doesn't appear unless add this
cat("\n")
cat(' \n \n') ### this is the key!
###### need to fix why this plots in Rstudio but not renedered
barbara_order <- c("blood__Pct_of_HLADRpos_HLA_DRpos_Ki67pos",
"blood__Pct_of_HLADRpos_Classical_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_Intermediate_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_Non_classical_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_DC3_Ki67pos"
)
## only for the Intervals
timepoints_to_use <- paste0(c("D3-D1","D8-D1","D22-D1"),"_",str_after_first(params$log2_or_raw,"value_"),"FC")
TP_name <- "Intervals"
corr_tag_name <- "CorrVsMSD"
raw_or_adj <- "raw"
cor_method <- "spearman"
trim_pval_level <- 0.05
all_or_selected <- "selected"
assay_arrangement <- c("CCL19.MIP.3b","CCL24.Eotaxin.2","CCL26.Eotaxin.3","CCL27.CTACK","CXCL13",
"FLT3L","CCL13.MCP.4","CXCL12.SDF.1a","CCL11.Eotaxin","CCL22.MDC",
#"IL.17B",
"IL.9") ### if trimming/ordering assays
### set pos/neg/both
directions_to_use <- c("pos")
assays_to_exclude <- c("")
raw p < 0.05
#### make df for plotting
df_long1 <- fullcombo_df_long %>%
### generate dir column
dplyr::mutate(dir = ifelse(p < trim_pval_level,
ifelse(corr_coef > 0, "pos","neg"),
"notSig"),
### break out compartment
compartment = str_before_first(celltype,"__"),
celltype = str_after_first(celltype,"__"),
celltype_assay = str_after_first(celltype_assay,"__")) %>%
### remove the corr and p vals for pivoting
dplyr::select(-c(corr_coef,p))
###### set up inital dataframe
## trim to only sig
bub_df2 <- df_long1[which(df_long1$dir %in% directions_to_use ),] %>%
### also remove any analytes_to_exclude
dplyr::filter(!assay %in% assays_to_exclude) %>%
dplyr::mutate(timepoint = factor(str_before_first(timepoint,"_"),str_replace_all(str_before_first(timepoints_to_use,"_"),"-","vs")),
dir = factor(dir, directions_to_use),
##### add compartment to celltype
comp_celltype = paste0(compartment,"__",celltype))
#### remove timepoint suffix from celltype (only needed for vs Abs/NAbs)
cleaned_celltypes <- bub_df2$comp_celltype
for(tu in timepoints_to_use){
cleaned_celltypes <- str_remove_all(cleaned_celltypes,paste0("__",str_replace_all(tu,"-","vs")))
}
bub_df2$celltype <- cleaned_celltypes
#### if selected tab combos, keep cell pops on the x-axis
bub_df_bothordered <- bub_df2 %>%
#### trim to desired assays
dplyr::filter(assay %in% assay_arrangement) %>%
### remove factor levels for assay
dplyr::mutate(assay = factor(as.character(assay),assay_arrangement),
### also breakout for celltype
celltype_only = str_after_first(celltype,"__"),
compartment = factor(compartment, c("blood","LN"))) %>%
dplyr::arrange(assay) %>%
#### trim to barbara_order celltypes
dplyr::filter(celltype %in% barbara_order) %>%
## lock in factor levels for assay (rev for y axis since want it to be reversed)
dplyr::mutate(assay = factor(assay, rev(unique(assay))),
### now get celltype order
celltype = factor(celltype, intersect(barbara_order,unique(celltype)))) %>%
#### now arrange by timepoint to be able to see bulls eyes
dplyr::arrange(desc(timepoint))
###### reversed axes!!!
bba <-
ggplot(bub_df_bothordered, aes(x = celltype, y = assay))+
geom_point(aes(size = timepoint, color = timepoint))+
#scale_alpha_manual(values = alpha_values)+
scale_size_manual(values = c(3,6,10))+
scale_color_manual(values = c("#262626","#7F7F7F","#BFBFBF"))+
theme_bw()+
### make legend symbol bigger & fix color
# guides(color = guide_legend(override.aes = list(size = 5)),
# size = guide_legend(override.aes = list(color = leg_color)))+
## remove gridlines
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
## rotate axis labels
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plot(bba)
cat("\n")
cat(' \n \n') ### this is the key!
### last plot in loop doesn't appear unless add this
cat("\n")
cat(' \n \n') ### this is the key!
###### need to fix why this plots in Rstudio but not renedered
Selected scatterplots
selected_pairs_df <- data.frame("var_x"=c("blood__Pct_of_HLADRpos_Classical_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_Non_classical_Mo_Ki67pos",
"blood__Pct_of_HLADRpos_HLA_DRpos_Ki67pos",
"blood__Pct_of_HLADRpos_Non_classical_Mo_Ki67pos",
"LN__Pct_of_HLADRpos_Classical_Mo_Ki67pos",
"LN__Pct_of_HLADRpos_Intermediate_Mo_Ki67pos",
"LN__Pct_of_HLADRpos_DC3_Ki67pos",
"LN__Pct_of_HLADRpos_Intermediate_Mo_Ki67pos"),
"Day_x"=c("D3-D1",
"D8-D1",
"D8-D1",
"D8-D1",
"D8-D1",
"D3-D1",
"D8-D1",
"D3-D1"
),
"var_y"=c("IL.9",
"FLT3L",
"CCL27.CTACK",
"CXCL12.SDF.1a",
"G.CSF",
"CXCL12.SDF.1a",
"FLT3L",
"CCL13.MCP.4"),
"Day_y"=c(rep("D2-D1",8))
)
num_columns_for_plot <- 2
### pivot curr_ds to separate timepoint from animal
df_long <- curr_ds %>%
rownames_to_column("comp_var") %>%
tidyr::pivot_longer(!comp_var, names_to = "sampleName", values_to = "log2FC") %>%
### break out MacaqueID and Day
dplyr::mutate(Day = str_after_first(sampleName,"__"),
Day = str_replace_all(Day,"-","vs"),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
### add in the second df
dplyr::full_join(data.frame(t(curr_df2)) %>%
rownames_to_column("comp_var") %>%
tidyr::pivot_longer(!comp_var,
names_to = "sampleName", values_to = "log2FC") %>%
### break out MacaqueID and Day
dplyr::mutate(Day = str_after_first(sampleName,"__"),
Day = str_replace_all(Day,"\\.","vs"),
Day = str_replace_all(Day,"-","vs"),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL),
by = c("comp_var","Day","MacaqueID","log2FC")
)
list_of_plots <- list()
## for each selected pair
for(i in 1:nrow(selected_pairs_df)){
curr_x <- selected_pairs_df$var_x[i]
curr_Day_x <- str_replace_all(selected_pairs_df$Day_x[i],"-","vs")
curr_y <- selected_pairs_df$var_y[i]
curr_Day_y <- str_replace_all(selected_pairs_df$Day_y[i],"-","vs")
phen_to_plot <- df_long %>%
dplyr::filter(comp_var %in% c(curr_x,curr_y)) %>%
dplyr::filter(Day %in% c(curr_Day_x,curr_Day_y)) %>%
### combine var and time
dplyr::mutate(comp_var_day = paste0(comp_var,"___",Day),
comp_var = NULL, Day = NULL) %>%
tidyr::pivot_wider(names_from = comp_var_day, values_from = log2FC)
#### base plot
p <- ggplot(phen_to_plot, aes(x = !!rlang::sym(paste0(curr_x,"___",curr_Day_x)),
y = !!rlang::sym(paste0(curr_y,"___",curr_Day_y)))) +
geom_point()+
stat_cor(method = "spearman")+
xlab(paste0(curr_x,"\n",curr_Day_x))+
ylab(paste0(curr_y,"\n",curr_Day_y))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title = element_text(size = 10)) ### make smaller to fit
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
varcomp_to_plot <- c("blood__Pct_of_HLADRpos_pDC_Ki67pos","LN__Pct_of_HLADRpos_pDC_Ki67pos",
"blood__Pct_of_HLADRpos_cDC_Ki67pos","LN__Pct_of_HLADRpos_cDC_Ki67pos",
"blood__Pct_of_HLADRpos_Classical_Mo_Ki67pos","LN__Pct_of_HLADRpos_Classical_Mo_Ki67pos")
#### need to reload Fig5 prep
list_of_stats[["FigS4A"]] <- data.frame("FigS4A"="stats are in the Fig5ABC tab")
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig5"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__")))) %>%
### remove the control rows that are only there for Fig4E
dplyr::filter(!grepl("control",MacaqueID))
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_first(sampleName,"__"), ### includes the vaccine
sampleName = NULL
)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("D3","D8","D22",
"D8","D22","D22"),
"time2"=c("D1","D1","D1",
"D3","D3","D8")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig5ABC"
#contrast_name <- "Fig5D_Myeloid_vsD1"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 3
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### trim to only the vars in this panel
dplyr::filter(variable %in% varcomp_to_plot) %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),varcomp_to_plot),
Day = factor(str_after_first(sampleName,"__"),c("D1","D3","D8","D22")),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_point(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
geom_line(aes(group = MacaqueID), color = "grey30")+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
########## uses data from Figure 5
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig5"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__")))) %>%
### remove the control rows that are only there for Fig4E
dplyr::filter(!grepl("control",MacaqueID))
curr_phen_long <- data_to_use %>%
# calculate additional populations for generating proportions of 100%
dplyr::mutate(LN__Pct_of_HLADRpos_cDC_wo_cDC1orcDC2_Ki67pos = LN__Pct_of_HLADRpos_cDC_Ki67pos -
(LN__Pct_of_HLADRpos_cDC1_Ki67pos + LN__Pct_of_HLADRpos_cDC2_Ki67pos),
LN__Pct_of_HLADRpos_cDC_Ki67pos = NULL,
LN__Pct_of_HLADRpos_Classical_Mo_minusDC3_Ki67pos = LN__Pct_of_HLADRpos_Classical_Mo_Ki67pos - LN__Pct_of_HLADRpos_DC3_Ki67pos,
LN__Pct_of_HLADRpos_Classical_Mo_Ki67pos = NULL,
LN__Pct_of_HLADRpos_HLA_DRpos_Ki67pos = NULL,
blood__Pct_of_HLADRpos_cDC_wo_cDC1orcDC2_Ki67pos = blood__Pct_of_HLADRpos_cDC_Ki67pos -
(blood__Pct_of_HLADRpos_cDC1_Ki67pos + blood__Pct_of_HLADRpos_cDC2_Ki67pos),
blood__Pct_of_HLADRpos_cDC_Ki67pos = NULL,
blood__Pct_of_HLADRpos_Classical_Mo_minusDC3_Ki67pos = blood__Pct_of_HLADRpos_Classical_Mo_Ki67pos - blood__Pct_of_HLADRpos_DC3_Ki67pos,
blood__Pct_of_HLADRpos_Classical_Mo_Ki67pos = NULL,
blood__Pct_of_HLADRpos_HLA_DRpos_Ki67pos = NULL) %>%
### get average for each day/compartment
# first make longer
dplyr::select(-sampleName) %>%
tidyr::pivot_longer(cols = contains("__"), names_to = "comp_var", values_to = "value") %>%
dplyr::mutate(compartment = factor(str_before_first(comp_var,"__"), c("blood","LN")),
variable = str_after_last(comp_var,"__"),
Day = factor(Day, c("D1","D3","D8","D22"))) %>%
### collect proportion of 100% and means
# collect total
group_by(compartment, Day, MacaqueID) %>%
dplyr::mutate(total_per_comp_time = sum(value)) %>%
ungroup() %>%
# calculate prop within each animal
group_by(compartment, variable, Day, MacaqueID) %>%
dplyr::mutate(prop = value / total_per_comp_time *100) %>%
ungroup() %>%
# take mean across animals
group_by(compartment, variable, Day) %>%
dplyr::summarise(mean = mean(prop)) %>%
# add comp_time
dplyr::mutate(comp_time = paste0(compartment,"__",Day),
variable = factor(variable, c("Pct_of_HLADRpos_pDC_Ki67pos",
"Pct_of_HLADRpos_cDC1_Ki67pos",
"Pct_of_HLADRpos_cDC2_Ki67pos",
"Pct_of_HLADRpos_cDC_wo_cDC1orcDC2_Ki67pos",
"Pct_of_HLADRpos_Non_classical_Mo_Ki67pos",
"Pct_of_HLADRpos_Intermediate_Mo_Ki67pos",
"Pct_of_HLADRpos_Classical_Mo_minusDC3_Ki67pos",
"Pct_of_HLADRpos_DC3_Ki67pos"))) %>%
# sort
ungroup() %>%
dplyr::arrange(compartment,Day,variable)
### To name output files
contrast_name <- "FigS4B_mean_of_props"
list_of_stats[[paste0(contrast_name)]] <- curr_phen_long %>%
dplyr::select(-comp_time)
num_columns_for_plot <- 4
list_of_plots <- list()
#collect phen_to_plots
phen_to_plots <- data.frame()
## for each comp_time
for(i in unique(curr_phen_long$comp_time)){
phen_to_plot <- curr_phen_long %>%
dplyr::filter(comp_time == i) %>%
## add in labels
dplyr::arrange(compartment,variable) %>%
dplyr::mutate(label = paste0(signif(mean,2),"%"),
### add positions https://wilkelab.org/practicalgg/articles/bundestag_pie.html
cumsum_mean = cumsum(mean),
sum_mean = sum(mean),
end_angle = 2*pi*cumsum(mean)/sum(mean), # ending angle for each pie slice
start_angle = lag(end_angle, default = 0), # starting angle for each pie slice
mid_angle = 0.5*(start_angle + end_angle), # middle of each pie slice, for the text label
# horizontal and vertical justifications depend on whether we're to the left/right
# or top/bottom of the pie
hjust = ifelse(mid_angle > pi, 1, 0),
vjust = ifelse(mid_angle < pi/2 | mid_angle > 3*pi/2, 0, 1),
## add in positions for polar coord https://r-charts.com/part-whole/pie-chart-labels-outside-ggplot2/
# csum = rev(cumsum(rev(mean))),
# pos = mean/2 + lead(csum, 1),
# pos = if_else(is.na(pos), mean/2, pos),
### add explosion
focus = ifelse(mean < 10, 0.04, 0)) %>%
## remove rows with mean = 0
dplyr::filter(mean != 0)
if(dim(phen_to_plots)[1] == 0){
phen_to_plots <- phen_to_plot
}else{
phen_to_plots <- rbind(phen_to_plots,phen_to_plot)
}
### set colors
color_df <- data.frame("variable"=unique(curr_phen_long$variable),
"fill"=c("#D6D6D6","#0533FF","#D783FF","#73FCD6",
"#FFFC79","#038F01","#FF7E79","#FF2501"),
"color"=c("grey50","#0533FF","#D783FF","#59A38E",
"lightgoldenrod3","#038F01","#FF7E79","#FF2501"))
#### base plot
p1 <- phen_to_plot %>%
## plot
ggplot() +
scale_fill_manual(values=c(color_df$fill[which(color_df$variable %in% unique(phen_to_plot$variable))])) +
scale_color_manual(values=c(color_df$color[which(color_df$variable %in% unique(phen_to_plot$variable))])) +
ggforce::geom_arc_bar(
aes(x0 = 0, y0 = 0, r0 = 0, r = 0.7, amount = mean,
fill = variable, explode = focus),
data=phen_to_plot, stat = 'pie', size = 0.1)+
geom_text(
aes(
x = (.75+focus) * sin(mid_angle),
y = (.75+focus) * cos(mid_angle),
label = label, color = variable,
hjust = hjust, vjust = vjust
),
size = 10/.pt,
show.legend = FALSE ## no a in the legend
) +
coord_fixed() +
# geom_bar(stat="identity", width=1, color="black", size = 0.1) +
#coord_polar("y", start=0, direction = -1)+
theme_void() +
### add labels
# geom_text_repel(aes(x = 0, y = pos,
# label = label),
# size = 3, nudge_x = 1)+
ggtitle(str_replace_all(i,"__",", "))+
theme(plot.title = element_text(hjust = 0.5),
)
# plot(p1)
list_of_plots[[i]] <- p1 + theme(legend.position="none")
cat("\n")
cat(' \n \n') ### this is the key!
}
### get the legend
#https://stackoverflow.com/questions/13649473/add-a-common-legend-for-combined-ggplots
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)}
### add empty filler
list_of_plots[[length(list_of_plots)+1]]<-ggplot()+theme_void()
list_of_plots[[length(list_of_plots)+2]]<-g_legend(p1) ### grabs from the l
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
GC populations
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig6"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__")))) %>%
### remove the control rows that are only there for Fig4E
dplyr::filter(!grepl("control",MacaqueID))
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_first(sampleName,"__"), ### includes the vaccine
sampleName = NULL
)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("D3","D8","D22",
"D8","D22","D22"),
"time2"=c("D1","D1","D1",
"D3","D3","D8")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig6BE"
#contrast_name <- "Fig5D_Myeloid_vsD1"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 3
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
varcomp_to_plot <- c("LN__CD4_Tfh_Ki67pos","LN__GC_CD4_Tfh_Ki67pos",
"LN__gCD8_Tfh_Ki67pos","LN__B_Cells_Bcl6pos_Pct_B_cells")
### read in degs results
degs_df <- list_of_stats[[paste0(contrast_name)]] %>%
#### trim to only the vars in the figure panel
dplyr::filter(rowname %in% varcomp_to_plot)
#### for hm only want the vs D1
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("D1_log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(str_remove_all(colnames(.),"-D1_log2FC")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
timepoint_factor_order <- c("D3","D8","D22")
############### annotation colors (set number of dirs)
annotation_colors <- list(Day = c(D3="grey40",D8="grey80",D22="grey90"),
dir1 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir2 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir3 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir4 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir5 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir6 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue")
)
names(annotation_colors[["Day"]]) <- timepoint_factor_order
names(annotation_colors) <- c("Day",paste0(str_replace_all(TimePoints_to_compare$name,"-","vs"),"_dir"))
######### annotation for columns
annotation_col <- data.frame("contrast"= colnames(degs_df_for_hm)) %>%
mutate(Day=factor(contrast, timepoint_factor_order)) %>%
arrange(Day) %>%
column_to_rownames("contrast")
############## add row annotation with sig change for FC
### generate contrasts
curr_contrasts <- c()
for(npt in 1:nrow(TimePoints_to_compare)){
cont <- paste0(TimePoints_to_compare$time1[npt],"-",TimePoints_to_compare$time2[npt])
curr_contrasts <- c(curr_contrasts,cont)
}
annotation_row1 <- data.frame("rowname"=degs_df$rowname)
for(curr_contrast in rev(curr_contrasts)){
### trim to curr contrast
curr_pval_df <- degs_df[,c("rowname",colnames(degs_df)[grepl(curr_contrast,colnames(degs_df))])]
### rename column names to be generic
orig_names <- colnames(curr_pval_df)
simp_names <- ifelse(grepl("_",orig_names),str_after_last(orig_names,"_"),orig_names)
colnames(curr_pval_df) <- simp_names
curr_pval_df <- curr_pval_df %>%
dplyr::mutate(dir = ifelse(pval < as.double(params$pCutoff),
ifelse(log2FC > as.double(params$FCcutoff),"up", ifelse(log2FC > 0, "up_p_only",
ifelse(log2FC < (-1)*as.double(params$FCcutoff),"down",ifelse(log2FC < 0, "down_p_only","p_only")))),"not_sig"))
curr_df <- curr_pval_df[,c("rowname","dir")] %>%
dplyr::mutate(dir = factor(dir, c("up","up_p_only","not_sig","down_p_only","down")))
colnames(curr_df) <- c("rowname",paste0(str_remove_all(str_replace_all(curr_contrast,"-","vs"),"V4_"),"_dir"))
annotation_row1 <- dplyr::left_join(annotation_row1,curr_df, by = "rowname")
}
#### make rownames
annotation_row <- column_to_rownames(annotation_row1)
### sort to specified order
curr_ann_row <- annotation_row[varcomp_to_plot,]
mycols = diverging_hcl(n=100, palette="Blue-Red 3")
scale_range = 2
breakscale <- seq(-1*scale_range, scale_range, length=100)
p <- pheatmap(degs_df_for_hm[rownames(curr_ann_row),rownames(annotation_col)], scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
#gaps_col = gaps_to_use,
border_color = NA,
cluster_rows = FALSE, cluster_cols = FALSE,
annotation_col = annotation_col,show_colnames = FALSE,
annotation_row = curr_ann_row,
annotation_colors = annotation_colors
)
p
cat("\n")
cat(' \n \n') ### this is the key!
spearman correlation vs self
########## generate "dataset" format - wide, no metadata with columns = samples (including vax, timepoint, animal), rows = analytes
##### has analyte names as rownames AND as column labeled gene
df <- data_to_use %>%
### remove metadata
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "analyte", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL) %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "sampleName", values_from = "log2_value") %>%
dplyr::mutate(Gene=analyte) %>%
column_to_rownames("analyte") %>%
relocate(Gene)
analyte_columns <- rownames(df)
## generate metadata only
targetfile_full <- data_to_use %>%
### remove data columns
dplyr::select(-all_of(analyte_columns))
id_var_of_interest <- "sampleName"
var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
## designate covariates with Day related first, then id
covariates=c("Day","MacaqueID")
contrasts <- c("D3-D1","D8-D1","D22-D1")
contrast_name <- "Fig6C_GC"
### for CorrVsSelf
set_names_to_use <- c(paste0(c("D8-D1","D22-D1"),"_",str_after_first(params$log2_or_raw,"value_"),"FC_withinCP"))
vars_for_selfCorr <- varcomp_to_plot
corr_tag_name <- "CorrVsSelf"
set_name_labels <- data.frame("set_name"=set_names_to_use,
"label"=c("D8vsD1 (top)", "D22vsD1 (bottom)"))
fullcombo_df_long <- data.frame()
for(set_name in set_names_to_use){
######## calculate correlation
cor_method <- "spearman"
curr_ds <- list_of_diff_counts[[paste0(contrast_name)]] %>%
### trim to only the vars desired
dplyr::filter(Gene %in% vars_for_selfCorr) %>%
column_to_rownames("Gene")
curr_df <- data.frame(t(curr_ds)) %>%
### trim to only tp of interest
rownames_to_column("sampleName") %>%
dplyr::filter(grepl(str_before_first(set_name,"_"),sampleName)) %>%
### be sure sorted
dplyr::arrange(sampleName) %>%
column_to_rownames("sampleName") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
## generate cor matrix
cor_mat <- cor(as.matrix(signif(curr_df,7)), method = cor_method, use="pa")
## raw p
cor_test_mat <- data.frame(matrix(NA, nrow=dim(curr_df)[2], ncol=dim(curr_df)[2]))
rownames(cor_test_mat) <- colnames(curr_df)
colnames(cor_test_mat) <- colnames(curr_df)
for(curr1 in colnames(curr_df)) {
for(curr2 in colnames(curr_df)) {
cor_test_mat[curr1, curr2] <-
cor.test(as.matrix(signif(curr_df,7))[,curr1],
as.matrix(signif(curr_df,7))[,curr2],
method = cor_method, use="pa")$p.value
}
}
## don't bother adjusting since won't be symmetric
# cor_test_adj_mat <- cor_test_mat #duplicate dataframe to replace values
# for(curr_assay in colnames(curr_df1)) {
# cor_test_adj_mat[curr_assay,] <- p.adjust(cor_test_mat[curr_assay,], method = "BH")
# }
raw_or_adj <- "raw"
ann_pval_level <- 0.05
trim_pval_level <- 0.05
cat(" ", paste0(raw_or_adj," p < ", trim_pval_level),"\n")
cat("\n")
cat(" ", paste0("untrimmed = all analytes"),"\n")
cat("\n")
cat(" ", paste0("trimmed = only analytes with at least one association p < ",trim_pval_level),"\n")
cat("\n")
cat(" ", paste0("p-value for stars annotation < ",ann_pval_level),"\n")
cat("\n")
cat("\n")
cat(' \n \n') ### this is the key!
order <- "alphabet"
vars_ann <- data.frame(cor_mat) %>%
rownames_to_column() %>%
arrange(rowname)
ordered_vars_to_use <- vars_ann$rowname
### need this in case have only since variable being compared
cor_mat_for_plot <- as.matrix(cor_mat[ordered_vars_to_use,ordered_vars_to_use])
colnames(cor_mat_for_plot) <- ordered_vars_to_use
rownames(cor_mat_for_plot) <- ordered_vars_to_use
cor_test_mat_for_plot <- as.matrix(cor_test_mat[ordered_vars_to_use,ordered_vars_to_use])
rownames(cor_test_mat_for_plot) <- ordered_vars_to_use
colnames(cor_test_mat_for_plot) <- ordered_vars_to_use
#### set diag to 1, so won't be sig
diag(cor_test_mat_for_plot) <- 1
legend_ratio = 0.3
text_size = 0.3
cat("#### ", paste0(set_name_labels$label[which(set_name_labels$set_name == set_name)]),"\n")
cat("\n")
if(length(which(cor_test_mat_for_plot < as.numeric(ann_pval_level))) == 0){
corrplot::corrplot(cor_mat_for_plot, method = 'square',
is.corr = TRUE, #since symmetric
type = 'lower', diag = FALSE,
p.mat = cor_test_mat_for_plot,
sig.level = as.numeric(ann_pval_level), ## doesn't work if none are sig
#sig.level = 0.1,#set p < 0.05 to blank
insig='pch',
pch.cex = 1.2, pch.col = 'black',tl.col = 'black',tl.cex=text_size,
##legend
cl.ratio = legend_ratio, #cl.cex = 2,
#addCoef.col ='grey80',
#number.cex = 0.5, #add corr coeff to sig circles
order = order, ## CANNOT ORDER ASSYMMETRIC
col = rev(COL2('PuOr',1000)), ##make orange positive
col.lim = c(-1,1) ##set limits to be the same across plots
# title = vaccine_groups[i], mar=c(0,0,1,0) # http://stackoverflow.com/a/14754408/54964
)
cat("\n")
cat(' \n \n') ### this is the key!
}else{
#### otherwise can use the stars
corrplot::corrplot(cor_mat_for_plot, method = 'square',
is.corr = TRUE, #since symmetric
type = 'lower', diag = FALSE,
p.mat = cor_test_mat_for_plot,
sig.level = as.numeric(ann_pval_level),
insig='label_sig', #set p < 0.05 to blank
pch.cex = 1.2, pch.col = 'white',tl.col = 'black',tl.cex=text_size,
##legend
cl.ratio = legend_ratio, #cl.cex = 2,
#addCoef.col ='grey80',
#number.cex = 0.5, #add corr coeff to sig circles
order = order, ## CANNOT ORDER ASSYMMETRIC
col = rev(COL2('PuOr',1000)), ##make orange positive
col.lim = c(-1,1) ##set limits to be the same across plots
# title = vaccine_groups[i], mar=c(0,0,1,0) # http://stackoverflow.com/a/14754408/54964
)
cat("\n")
cat(' \n \n') ### this is the key!
}
cat("\n")
cat(' \n \n') ### this is the key!
######### save the stats
### write function for converting the cor_mat to long format df
corr_to_longSELF <- function(mat){
data.frame(mat) %>%
rownames_to_column("celltype1") %>%
tidyr::pivot_longer(!celltype1, names_to = "celltype2", values_to = "value") %>%
dplyr::mutate(celltype1_vs_2 = paste0(celltype1,"_vs_",celltype2))
}
#### convert to long
cor_long <- corr_to_longSELF(cor_mat)
if(raw_or_adj == "raw"){
p_long <- corr_to_longSELF(cor_test_mat)
}else{
p_long <- corr_to_longSELF(cor_test_adj_mat)
}
#### combine cor and p long dfs
curr_df_long <- dplyr::left_join(cor_long,p_long, by = c("celltype1","celltype2","celltype1_vs_2")) %>%
### rename values to be useful
dplyr::rename(corr_coef=value.x,p=value.y) %>%
### add column with set_name
dplyr::mutate(timepoint = paste0(str_replace_all(set_name,"-","vs")))
### add to fullcombo
fullcombo_df_long <- rbind(fullcombo_df_long,curr_df_long)
}
raw p < 0.05
untrimmed = all analytes
trimmed = only analytes with at least one association p < 0.05
p-value for stars annotation < 0.05
raw p < 0.05
untrimmed = all analytes
trimmed = only analytes with at least one association p < 0.05
p-value for stars annotation < 0.05
cat("\n")
cat(' \n \n') ### this is the key!
CorrName <- paste0(contrast_name,"__",corr_tag_name)
### add to stats master list
list_of_stats[[paste0(CorrName)]] <- fullcombo_df_long %>%
dplyr::relocate(celltype1, celltype2, corr_coef, p, timepoint)
##### set prior for log2 (0 for MSD, 1e-4 for cell pops, 1 for IHC)
prior_to_useIHC <- 1
##### select data
curr_df_long <- list_of_datasets[["IHC"]] %>%
### filter to only vars of interest = GC zone
dplyr::filter(zone == "GC") %>%
### add log10
dplyr::mutate(celltype_zone = paste0(celltype,"_",zone),
value_plus = value_raw + as.numeric(prior_to_useIHC),
value_log10 = log10(value_plus),
raw_value = NULL,
### factorize time
Day = factor(Day, c("D1","D3","D8","D22"))
)
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig6D_IHC"
#contrast_name <- "Fig5D_Myeloid_vsD1"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 2
IHC differences between timepoints (within a zone)
\(log10(value + 1)_{zoneXcelltypeY}\) ~ Time + (1|MacaqueID)
Contrasts within D3, D8 D22 are only for LION treatments, since CTRLs have just one timepoint
### if want to trim which p values on plot, these are are all
contrasts_to_keep <- c("vsD1","vsD3","vsD8")
treatments_to_keep <- c("LION","CTRL")
### Scatterplots by time/zone
#### read in full pvals
pval_df_full <- list_of_stats[[paste0(contrast_name,"_lmerTestByTime")]]
### trim to contrasts
pval_df_trimmed1 <- pval_df_full %>%
dplyr::filter(grepl(paste(contrasts_to_keep, collapse = '|'),contrast)) %>%
dplyr::filter(!is.na(lmer_est))
#### for each celltype "TAB", plot the median value (across replicates) for each animal/variable/zone/timepoint
cz_tabs <- unique(pval_df_trimmed1$variable)
y_scale_df <- data.frame("celltype"=c("CD4","CD8"),
"color"=c("#08F8F9","#F942F1"),
"max"=c(5,5),
"min"=c(0,0),
"to_plot"=c(11,11))
list_of_plots <- list()
### for each celltype/zone
for(curr_tab in cz_tabs){
## first trim pvals to the celltype "TAB"
pval_df_trimmed <- pval_df_trimmed1[grepl(curr_tab,pval_df_trimmed1$variable),]
curr_celltype <- str_before_first(curr_tab,"_")
curr_zone <- str_after_first(curr_tab,"_")
##### also trim the timepoint_comparison_df
curr_timepoint_comparison_df <- timepoint_comparison_df %>%
dplyr::filter(grepl(paste(contrasts_to_keep, collapse = '|'),name))
#### put pvals in format for plotting, tribbles
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$variable)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(curr_timepoint_comparison_df)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$variable == i & pval_df_trimmed$contrast == curr_timepoint_comparison_df[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
curr_timepoint_comparison_df[g,"time1"],curr_timepoint_comparison_df[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$variable == i & pval_df_trimmed$contrast == curr_timepoint_comparison_df[g,"name"]),"lmer_praw"],
pval_df_trimmed[which(pval_df_trimmed$variable == i & pval_df_trimmed$contrast == curr_timepoint_comparison_df[g,"name"]),"lmer_est"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(curr_tab)]] <- pval_df
}
#### prep phen_to_plot
#### add additional factorized vars
phen_to_plot <- curr_df_long %>%
### filter to only lion
dplyr::filter(treatment %in% treatments_to_keep) %>%
### filter variables
dplyr::filter(celltype_zone %in% pval_df_trimmed$variable) %>%
### factorize
dplyr::mutate(zone = factor(zone, c("GC","noGC","TCZ")),
celltype = factor(celltype, c("CD4","CD8"))) %>%
dplyr::arrange(MacaqueID)
plot_sig_only <- "no"
size_for_points <- 1
stroke_for_points <- 0.4
dodge_for_points <- 0.9
max_y <- y_scale_df$max[which(y_scale_df$celltype == curr_celltype)] #### set this to be consistent for each celltype
min_y <- y_scale_df$min[which(y_scale_df$celltype == curr_celltype)]
maxy_y_to_plot <- y_scale_df$to_plot[which(y_scale_df$celltype == curr_celltype)]
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(curr_timepoint_comparison_df),
length(which(pval_df_trimmed[i,] %>% dplyr::select(-variable) < as.double(params$pCutoff))) #### fix
)
color_df <- data.frame("zone"=c("GC","noGC","TCZ"),
"shape" = c(21, 22, 24))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = value_log10,
shape = zone,
fill = celltype#,
# group = MacaqueID
)) +
scale_fill_manual(values=y_scale_df$color[which(y_scale_df$celltype == curr_celltype)]) +
scale_color_manual(values=y_scale_df$color[which(y_scale_df$celltype == curr_celltype)]) +
scale_shape_manual(values=color_df$shape[which(color_df$zone == curr_zone)]) +
#scale_shape_manual(values=shapes_df$shape_for_plot[which(shapes_df$animal %in% unique(phen_to_plot$MacaqueID))]) +
#geom_line(color="grey",linetype = "dashed")+
geom_beeswarm(aes(
#fill=zone,
#color=zone,
shape=zone,
), fill="white", color="black",size = size_for_points, stroke = stroke_for_points, alpha = 0.9,corral = "random", corral.width = 0.5,
dodge.width = dodge_for_points)+ ## makes side by side groups
# geom_point(aes(
# fill=zone,
# color=zone,
# shape=zone,
# ), size = 3, stroke = 0.8, alpha = 0.9)+ ## makes side by side groups
#geom_text_repel() +
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot), breaks = seq(min_y,maxy_y_to_plot,1))+
#rremove("x.axis")+
theme_bw()+
labs(y = paste0(curr_celltype," ",curr_zone," for all animals (log10)")#,
#title = paste0(i)
)+
theme(legend.position="right", legend.box = "vertical",
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(),
plot.title = element_text(hjust = 0.5), #centers title
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black")
)
##### add p values
### for each zone
first_interval_to_use <- 0.05
interval_to_use <- 0.15
pvals_for_plot <- list_of_pvals[[curr_tab]] %>%
dplyr::mutate(zone=paste0(curr_zone),
celltype=paste0(curr_celltype),
MacaqueID="p") %>%
# add_x_position(x="Day") %>% ### does it in alphabetical order
rownames_to_column() %>%
dplyr::mutate(xmin = ifelse(group1 == "D1",1,
ifelse(group1 == "D3",2,3)),
xmax = ifelse(group2 == "D3",2,
ifelse(group2 == "D8",3,4
)),
y.position = c(max_y+first_interval_to_use*range_y+interval_to_use*as.numeric(rowname)*range_y))
for(curr_row in 1:nrow(pvals_for_plot)){
p <- p +
stat_pvalue_manual(pvals_for_plot[curr_row,],
label = "p",size = 3.5,tip.length = 0.01,
bracket.size = 0.2,
color = ifelse(pvals_for_plot$dir[curr_row] == "up","red",
ifelse(pvals_for_plot$dir[curr_row] == "down","blue",
ifelse(pvals_for_plot$dir[curr_row]== "not_sig","black","grey"))))
}
#### for lumped together add the median and quantiles
p <- p+
geom_violin(data = phen_to_plot, aes(x = Day, y = value_log10, fill = celltype), draw_quantiles = c(0.25, 0.5, 0.75), color = "grey40", alpha = 0.2,
inherit.aes = FALSE, scale = "width", adjust =0.5)+
# scale_color_manual(values = c("purple4","orange","grey60","black","black"))+
stat_summary(fun="mean", geom="crossbar", aes(color = "mean"), color = "black", linetype = "11", size = 0.3) +
stat_summary(fun="median", geom="crossbar",aes(color = "median"), color = "black", linetype = "solid", size = 0.3)
# plot(p)
cat('\n')
cat(' \n \n') ### this is the key!
cat('\n')
cat(' \n \n') ### this is the key!
# plot(p)
list_of_plots[[paste0(curr_tab)]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
varcomp_to_plot <- c("LN__CD4_Tfh_CXCR3neg_Ki67pos","LN__CD4_Tfh_CXCR3pos_Ki67pos",
"LN__GC_CD4_Tfh_CXCR3neg_Ki67pos","LN__GC_CD4_Tfh_CXCR3pos_Ki67pos",
"LN__CD8_Tfh_CXCR3neg_Ki67pos","LN__CD8_Tfh_CXCR3pos_Ki67pos")
#### reset contrast name to full p values
contrast_name <- "Fig6BE"
### read in degs results
degs_df <- list_of_stats[[paste0(contrast_name)]] %>%
#### trim to only the vars in the figure panel
dplyr::filter(rowname %in% varcomp_to_plot)
#### for hm only want the vs D1
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("D1_log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(str_remove_all(colnames(.),"-D1_log2FC")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
timepoint_factor_order <- c("D3","D8","D22")
############### annotation colors (set number of dirs)
annotation_colors <- list(Day = c(D3="grey40",D8="grey80",D22="grey90"),
dir1 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir2 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir3 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir4 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir5 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir6 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue")
)
names(annotation_colors[["Day"]]) <- timepoint_factor_order
names(annotation_colors) <- c("Day",paste0(str_replace_all(TimePoints_to_compare$name,"-","vs"),"_dir"))
######### annotation for columns
annotation_col <- data.frame("contrast"= colnames(degs_df_for_hm)) %>%
mutate(Day=factor(contrast, timepoint_factor_order)) %>%
arrange(Day) %>%
column_to_rownames("contrast")
############## add row annotation with sig change for FC
### generate contrasts
curr_contrasts <- c()
for(npt in 1:nrow(TimePoints_to_compare)){
cont <- paste0(TimePoints_to_compare$time1[npt],"-",TimePoints_to_compare$time2[npt])
curr_contrasts <- c(curr_contrasts,cont)
}
annotation_row1 <- data.frame("rowname"=degs_df$rowname)
for(curr_contrast in rev(curr_contrasts)){
### trim to curr contrast
curr_pval_df <- degs_df[,c("rowname",colnames(degs_df)[grepl(curr_contrast,colnames(degs_df))])]
### rename column names to be generic
orig_names <- colnames(curr_pval_df)
simp_names <- ifelse(grepl("_",orig_names),str_after_last(orig_names,"_"),orig_names)
colnames(curr_pval_df) <- simp_names
curr_pval_df <- curr_pval_df %>%
dplyr::mutate(dir = ifelse(pval < as.double(params$pCutoff),
ifelse(log2FC > as.double(params$FCcutoff),"up", ifelse(log2FC > 0, "up_p_only",
ifelse(log2FC < (-1)*as.double(params$FCcutoff),"down",ifelse(log2FC < 0, "down_p_only","p_only")))),"not_sig"))
curr_df <- curr_pval_df[,c("rowname","dir")] %>%
dplyr::mutate(dir = factor(dir, c("up","up_p_only","not_sig","down_p_only","down")))
colnames(curr_df) <- c("rowname",paste0(str_remove_all(str_replace_all(curr_contrast,"-","vs"),"V4_"),"_dir"))
annotation_row1 <- dplyr::left_join(annotation_row1,curr_df, by = "rowname")
}
#### make rownames
annotation_row <- column_to_rownames(annotation_row1)
### sort to specified order
curr_ann_row <- annotation_row[varcomp_to_plot,]
mycols = diverging_hcl(n=100, palette="Blue-Red 3")
scale_range = 2
breakscale <- seq(-1*scale_range, scale_range, length=100)
p <- pheatmap(degs_df_for_hm[rownames(curr_ann_row),rownames(annotation_col)], scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
#gaps_col = gaps_to_use,
border_color = NA,
cluster_rows = FALSE, cluster_cols = FALSE,
annotation_col = annotation_col,show_colnames = FALSE,
annotation_row = curr_ann_row,
annotation_colors = annotation_colors
)
p
cat("\n")
cat(' \n \n') ### this is the key!
Spearman correlation between MSD D2vsD1 and GC populations
########## generate "dataset" format - wide, no metadata with columns = samples (including vax, timepoint, animal), rows = analytes
##### has analyte names as rownames AND as column labeled gene
df <- data_to_use %>%
### remove metadata
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "analyte", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL) %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "sampleName", values_from = "log2_value") %>%
dplyr::mutate(Gene=analyte) %>%
column_to_rownames("analyte") %>%
relocate(Gene)
analyte_columns <- rownames(df)
## generate metadata only
targetfile_full <- data_to_use %>%
### remove data columns
dplyr::select(-all_of(analyte_columns))
id_var_of_interest <- "sampleName"
var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
## designate covariates with Day related first, then id
covariates=c("Day","MacaqueID")
contrasts <- c("D3-D1","D8-D1","D22-D1")
contrast_name <- "Fig6FG_GC"
raw_or_adj <- "raw"
### which dataset levels/FC, trimmed to the tab
curr_ds <- list_of_diff_counts[[paste0(contrast_name)]] %>%
column_to_rownames("Gene")
###### MSD = D2-D1 only log2FC
curr_df2 <- data.frame(t(list_of_diff_counts[[paste0("Fig3C_MSD_V4_D2vsD1")]])) %>%
### make row 1 colnames
janitor::row_to_names(row_number = 1) %>%
### trim to only D2-D1
rownames_to_column("sampleName") %>%
dplyr::filter(grepl("D2-D1",sampleName)) %>%
### be sure sorted
dplyr::arrange(sampleName) %>%
column_to_rownames("sampleName") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
set_names_to_use <- c(paste0(c("D3-D1","D8-D1","D22-D1"),"_",str_after_first(params$log2_or_raw,"value_"),"FC"))
corr_tag_name <- "CorrVsMSD"
######## calculate correlation
cor_method <- "spearman"
CorrName <- paste0(contrast_name,"__",corr_tag_name)
#### generate empty dataframe for combo
fullcombo_df_long <- data.frame()
for(set_name in set_names_to_use){
curr_df1 <- data.frame(t(curr_ds)) %>%
### trim to only tp of interest
rownames_to_column("sampleName") %>%
dplyr::filter(grepl(str_before_first(set_name,"_"),sampleName)) %>%
### be sure sorted
dplyr::arrange(sampleName) %>%
column_to_rownames("sampleName") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
## remove rows with all NAs
filter(if_any(everything(), ~ !is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
## generate cor matrix
cor_mat <- cor(as.matrix(signif(curr_df1,7)), as.matrix(signif(curr_df2,7)), method = cor_method, use="pa")
## raw p
cor_test_mat <- data.frame(matrix(NA, nrow=dim(curr_df1)[2], ncol=dim(curr_df2)[2]))
rownames(cor_test_mat) <- colnames(curr_df1)
colnames(cor_test_mat) <- colnames(curr_df2)
for(curr1 in colnames(curr_df1)) {
for(curr2 in colnames(curr_df2)) {
cor_test_mat[curr1, curr2] <-
cor.test(as.matrix(signif(curr_df1,7)[,curr1]),
as.matrix(signif(curr_df2,7)[,curr2]),
method = cor_method, use="pa")$p.value
}
}
## adj p for multiple comparisons by BH over cytokine assays
cor_test_adj_mat <- cor_test_mat #duplicate dataframe to replace values
for(curr_assay in colnames(curr_df1)) {
cor_test_adj_mat[curr_assay,] <- p.adjust(cor_test_mat[curr_assay,], method = "BH")
}
### write function for converting the cor_mat to long format df
corr_to_long <- function(mat){
data.frame(mat) %>%
rownames_to_column("celltype") %>%
tidyr::pivot_longer(!celltype, names_to = "assay", values_to = "value") %>%
dplyr::mutate(celltype_assay = paste0(celltype,"_vs_",assay))
}
#### convert to long
cor_long <- corr_to_long(cor_mat)
if(raw_or_adj == "raw"){
p_long <- corr_to_long(cor_test_mat)
}else{
p_long <- corr_to_long(cor_test_adj_mat)
}
#### combine cor and p long dfs
curr_df_long <- dplyr::left_join(cor_long,p_long, by = c("celltype","assay","celltype_assay")) %>%
### rename values to be useful
dplyr::rename(corr_coef=value.x,p=value.y) %>%
### add column with set_name
dplyr::mutate(timepoint = paste0(str_replace_all(set_name,"-","vs")))
### add to fullcombo
fullcombo_df_long <- rbind(fullcombo_df_long,curr_df_long)
}
### add to stats master list
list_of_stats[[paste0(CorrName)]] <- fullcombo_df_long %>%
dplyr::relocate(celltype, assay, corr_coef, p, timepoint)
barbara_order <- c("LN__CD4_Tfh_CXCR3pos_Ki67pos",
"LN__CD4_Tfh_CXCR3neg_Ki67pos",
"LN__GC_CD4_Tfh_CXCR3pos_Ki67pos",
"LN__GC_CD4_Tfh_CXCR3neg_Ki67pos",
"LN__CD8_Tfh_CXCR3pos_Ki67pos",
"LN__CD8_Tfh_CXCR3neg_Ki67pos",
"LN__Bcells_GC_BCL6pos_ki67pos_PctBCells")
## only for the Intervals
timepoints_to_use <- paste0(c("D3-D1","D8-D1","D22-D1"),"_",str_after_first(params$log2_or_raw,"value_"),"FC")
TP_name <- "Intervals"
corr_tag_name <- "CorrVsMSD"
raw_or_adj <- "raw"
cor_method <- "spearman"
trim_pval_level <- 0.05
all_or_selected <- "selected"
assay_arrangement <- c("CCL2.MCP.1","CCL3.MIP.1a","CCL4.MIP.1b","CCL8.MCP.2",
"CXCL10.IP.10","CXCL11.ITAC","M.CSF",
"CCL26.Eotaxin.3","CCL13.MCP.4","CXCL12.SDF.1a",
"FLT3L","IL.7","CX3CL1.Fractalkine") ### if trimming/ordering assays
### set pos/neg/both
directions_to_use <- c("pos")
assays_to_exclude <- c("")
raw p < 0.05
#### make df for plotting
df_long1 <- fullcombo_df_long %>%
### generate dir column
dplyr::mutate(dir = ifelse(p < trim_pval_level,
ifelse(corr_coef > 0, "pos","neg"),
"notSig"),
### break out compartment
compartment = str_before_first(celltype,"__"),
celltype = str_after_first(celltype,"__"),
celltype_assay = str_after_first(celltype_assay,"__")) %>%
### remove the corr and p vals for pivoting
dplyr::select(-c(corr_coef,p))
###### set up inital dataframe
## trim to only sig
bub_df2 <- df_long1[which(df_long1$dir %in% directions_to_use ),] %>%
### also remove any analytes_to_exclude
dplyr::filter(!assay %in% assays_to_exclude) %>%
dplyr::mutate(timepoint = factor(str_before_first(timepoint,"_"),str_replace_all(str_before_first(timepoints_to_use,"_"),"-","vs")),
dir = factor(dir, directions_to_use),
##### add compartment to celltype
comp_celltype = paste0(compartment,"__",celltype))
#### remove timepoint suffix from celltype (only needed for vs Abs/NAbs)
cleaned_celltypes <- bub_df2$comp_celltype
for(tu in timepoints_to_use){
cleaned_celltypes <- str_remove_all(cleaned_celltypes,paste0("__",str_replace_all(tu,"-","vs")))
}
bub_df2$celltype <- cleaned_celltypes
#### if selected tab combos, keep cell pops on the x-axis
bub_df_bothordered <- bub_df2 %>%
#### trim to desired assays
dplyr::filter(assay %in% assay_arrangement) %>%
### remove factor levels for assay
dplyr::mutate(assay = factor(as.character(assay),assay_arrangement),
### also breakout for celltype
celltype_only = str_after_first(celltype,"__"),
compartment = factor(compartment, c("blood","LN"))) %>%
dplyr::arrange(assay) %>%
#### trim to barbara_order celltypes
dplyr::filter(celltype %in% barbara_order) %>%
## lock in factor levels for assay (rev for y axis since want it to be reversed)
dplyr::mutate(assay = factor(assay, rev(unique(assay))),
### now get celltype order
celltype = factor(celltype, intersect(barbara_order,unique(celltype)))) %>%
#### now arrange by timepoint to be able to see bulls eyes
dplyr::arrange(desc(timepoint))
###### reversed axes!!!
bba <-
ggplot(bub_df_bothordered, aes(x = celltype, y = assay))+
geom_point(aes(size = timepoint, color = timepoint))+
#scale_alpha_manual(values = alpha_values)+
scale_size_manual(values = c(3,6,10))+
scale_color_manual(values = c("#262626","#7F7F7F","#BFBFBF"))+
theme_bw()+
### make legend symbol bigger & fix color
# guides(color = guide_legend(override.aes = list(size = 5)),
# size = guide_legend(override.aes = list(color = leg_color)))+
## remove gridlines
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
## rotate axis labels
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plot(bba)
cat("\n")
cat(' \n \n') ### this is the key!
### last plot in loop doesn't appear unless add this
cat("\n")
cat(' \n \n') ### this is the key!
###### need to fix why this plots in Rstudio but not renedered
Spearman correlation between Abs & GC populations
Compare each cell population to Ab levels combining the following “paired” timepoints:
Cell pop D1 & Ab D1
Cell pop D3/4 & Ab D8
Cell pop D8 & Ab D15
Cell pop D22 & Ab D22
curr_figure_name <- "Antibody"
##### select data
data_Abs <- list_of_datasets[["Abs"]] %>%
### filter to only vars of interest (removes NAb)
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))])))
########## generate "dataset" format for Abs - wide, no metadata with columns = samples (including vax, timepoint, animal), rows = analytes
##### has analyte names as rownames AND as column labeled gene
df <- data_Abs %>%
### remove metadata
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "analyte", values_to = "raw_value") %>%
dplyr::mutate(log10_value = log10(raw_value), ### no prior
raw_value = NULL,
### remove raw from analyte
analyte = str_replace_all(analyte,"_raw","_log10")) %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "sampleName", values_from = "log10_value") %>%
dplyr::mutate(Gene=analyte) %>%
column_to_rownames("analyte") %>%
relocate(Gene)
analyte_columns <- rownames(df)
## generate metadata only
targetfile_full <- data_Abs[,c("sampleName","MacaqueID","Day")]
id_var_of_interest <- "sampleName"
var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
## designate covariates with Day related first, then id
covariates=c("Day","MacaqueID")
contrasts <- c("D8-D1","D15-D1","D22-D1")
contrast_name <- "Fig6G_Abs"
vars_for_AbCorr <-c("LN__CD4_Tfh_CXCR3neg_Ki67pos",
"LN__CD4_Tfh_CXCR3pos_Ki67pos",
"LN__GC_CD4_Tfh_CXCR3neg_Ki67pos",
"LN__GC_CD4_Tfh_CXCR3pos_Ki67pos",
"LN__CD8_Tfh_CXCR3neg_Ki67pos",
"LN__CD8_Tfh_CXCR3pos_Ki67pos",
"LN__Bcells_GC_BCL6pos_ki67pos_PctBCells")
### load GC pops
contrast_name <- "Fig6FG_GC"
curr_ds <- list_of_diff_counts[[paste0(contrast_name)]] %>%
### trim to only the vars desired
dplyr::filter(Gene %in% vars_for_AbCorr) %>%
column_to_rownames("Gene")
### load Abs
contrast_name <- "Fig6G_Abs"
curr_ds_abs <- list_of_diff_counts[[paste0(contrast_name)]] %>%
column_to_rownames("Gene") %>%
t() %>%
data.frame() %>%
#### remove rows with all NA
filter(if_any(everything(), ~ !is.na(.))) %>%
### fix names
rownames_to_column("Animal_TP") %>%
dplyr::mutate(#Animal_TP = str_replace_all(Animal_TP,"-","vs"),
#### also unify with cell pops
Animal_TP = str_replace_all(Animal_TP,"D8-D1","D3-D1"),
Animal_TP = str_replace_all(Animal_TP,"D15-D1","D8-D1"))
#### set new for the stats
contrast_name <- "Fig6G_GC"
corr_tag_name <- "CorrVsAbPaired4TPs"
raw_or_adj <- "raw"
set_name <- "Intervals"
######## calculate correlation
cor_method <- "spearman"
#### rows = animal (trimmed to tp "set_name"), columns = vars
curr_df1 <- data.frame(t(curr_ds)) %>%
## remove rows with all NAs
filter(if_any(everything(), ~ !is.na(.))) %>%
### be sure sorted
rownames_to_column("sampleName") %>%
dplyr::arrange(sampleName) %>%
column_to_rownames("sampleName") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
### rows = animal, columns = vars, including day/interval
curr_df2 <- data.frame(curr_ds_abs) %>%
### be sure sorted
dplyr::arrange(Animal_TP) %>%
column_to_rownames("Animal_TP") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
CorrName <- paste0(contrast_name,"__",corr_tag_name)
## generate cor matrix
cor_mat <- cor(as.matrix(signif(curr_df1,7)), as.matrix(signif(curr_df2,7)), method = cor_method, use="pa")
## raw p
cor_test_mat <- data.frame(matrix(NA, nrow=dim(curr_df1)[2], ncol=dim(curr_df2)[2]))
rownames(cor_test_mat) <- colnames(curr_df1)
colnames(cor_test_mat) <- colnames(curr_df2)
for(curr1 in colnames(curr_df1)) {
for(curr2 in colnames(curr_df2)) {
cor_test_mat[curr1, curr2] <-
cor.test(as.matrix(signif(curr_df1,7)[,curr1]),
as.matrix(signif(curr_df2,7)[,curr2]),
method = cor_method, use="pa")$p.value
}
}
## adj p for multiple comparisons by BH over cytokine assays
cor_test_adj_mat <- cor_test_mat #duplicate dataframe to replace values
for(curr_assay in colnames(curr_df1)) {
cor_test_adj_mat[curr_assay,] <- p.adjust(cor_test_mat[curr_assay,], method = "BH")
}
### write function for converting the cor_mat to long format df
corr_to_long <- function(mat){
data.frame(mat) %>%
rownames_to_column("celltype") %>%
tidyr::pivot_longer(!celltype, names_to = "assay", values_to = "value") %>%
dplyr::mutate(celltype_assay = paste0(celltype,"_vs_",assay))
}
#### convert to long
cor_long <- corr_to_long(cor_mat)
if(raw_or_adj == "raw"){
p_long <- corr_to_long(cor_test_mat)
}else{
p_long <- corr_to_long(cor_test_adj_mat)
}
#### combine cor and p long dfs
curr_df_long <- dplyr::left_join(cor_long,p_long, by = c("celltype","assay","celltype_assay")) %>%
### rename values to be useful
dplyr::rename(corr_coef=value.x,p=value.y) %>%
### add column with set_name
dplyr::mutate(timepoint = paste0(str_replace_all(set_name,"-","vs")))
### add to stats master list
list_of_stats[[paste0(CorrName)]] <- curr_df_long %>%
dplyr::relocate(celltype, assay, corr_coef, p, timepoint)
raw_or_adj <- "raw"
ann_pval_level <- 0.05
trim_pval_level <- 0.05
cat("\n")
cat(' \n \n') ### this is the key!
######### plotting
vars_ann <- data.frame(cor_mat) %>%
rownames_to_column() %>%
arrange(rowname)
ordered_vars_to_use <- vars_ann$rowname
ABs_ann <- data.frame(t(cor_mat)) %>%
rownames_to_column() %>%
arrange(rowname)
ordered_ABs_to_use <- ABs_ann$rowname
### need this in case have only since variable being compared
cor_mat_for_plot <- as.matrix(cor_mat[ordered_vars_to_use,ordered_ABs_to_use])
colnames(cor_mat_for_plot) <- ordered_ABs_to_use
if(raw_or_adj == "raw"){
cor_test_mat_for_plot <- as.matrix(cor_test_mat[ordered_vars_to_use,ordered_ABs_to_use])
}else{
cor_test_mat_for_plot <- as.matrix(cor_test_adj_mat[ordered_vars_to_use,ordered_ABs_to_use])
}
rownames(cor_test_mat_for_plot) <- ordered_vars_to_use
colnames(cor_test_mat_for_plot) <- ordered_ABs_to_use
legend_ratio = 0.9
text_size = 0.8
star_size <- 3
#### plot with the star annotation
corrplot::corrplot(cor_mat_for_plot, method = 'square',
is.corr = FALSE, #since not symmetric
p.mat = cor_test_mat_for_plot,
sig.level = as.numeric(ann_pval_level),
insig='label_sig', #set p < 0.05 to blank
pch.cex = star_size, pch.col = 'white',tl.col = 'black',tl.cex=text_size,
##legend
cl.ratio = legend_ratio, #cl.cex = 2,
#addCoef.col ='grey80',
#number.cex = 0.5, #add corr coeff to sig circles
#order = order, ## CANNOT ORDER ASSYMMETRIC
col = rev(COL2('PuOr',1000)), ##make orange positive
col.lim = c(-1,1) ##set limits to be the same across plots
# title = vaccine_groups[i], mar=c(0,0,1,0) # http://stackoverflow.com/a/14754408/54964
)
cat("\n")
cat(' \n \n') ### this is the key!
contrast_name <- "Fig6BE"
data_to_useFig6 <- data_to_use
### trim to only vars to plot
variables_ordered <- c("LN__CD4_Tfh_Ki67pos","LN__GC_CD4_Tfh_Ki67pos",
"LN__gCD8_Tfh_Ki67pos","LN__B_Cells_Bcl6pos_Pct_B_cells")
data_to_use <- data_to_useFig6 %>%
dplyr::select(c(sampleName,MacaqueID,Day,
!!!rlang::syms(variables_ordered))
)
num_columns_for_plot <- 4
list_of_stats[["FigS5AC"]] <- data.frame("FigS5A"="stats are in the Fig6BE tab")
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),variables_ordered),
Day = factor(str_after_first(sampleName,"__"),timepoints_x_order),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_beeswarm(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
##### set prior for log2 (0 for MSD, 1e-4 for cell pops, 1 for IHC)
prior_to_useIHC <- 1
##### select data
curr_df_long <- list_of_datasets[["IHC"]] %>%
### filter to only vars of interest = GC zone
dplyr::filter(zone != "GC") %>%
### add log10
dplyr::mutate(celltype_zone = paste0(celltype,"_",zone),
value_plus = value_raw + as.numeric(prior_to_useIHC),
value_log10 = log10(value_plus),
raw_value = NULL,
### factorize time
Day = factor(Day, c("D1","D3","D8","D22"))
)
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "FigS5B_IHC"
#contrast_name <- "Fig5D_Myeloid_vsD1"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 2
IHC differences between timepoints (within a zone)
\(log10(value + 1)_{zoneXcelltypeY}\) ~ Time + (1|MacaqueID)
Contrasts within D3, D8 D22 are only for LION treatments, since CTRLs have just one timepoint
### if want to trim which p values on plot, these are are all
contrasts_to_keep <- c("vsD1","vsD3","vsD8")
treatments_to_keep <- c("LION","CTRL")
### Scatterplots by time/zone
#### read in full pvals
pval_df_full <- list_of_stats[[paste0(contrast_name,"_lmerTestByTime")]]
### trim to contrasts
pval_df_trimmed1 <- pval_df_full %>%
dplyr::filter(grepl(paste(contrasts_to_keep, collapse = '|'),contrast)) %>%
dplyr::filter(!is.na(lmer_est))
#### for each celltype "TAB", plot the median value (across replicates) for each animal/variable/zone/timepoint
cz_tabs <- unique(pval_df_trimmed1$variable)
y_scale_df <- data.frame("celltype"=c("CD4","CD8"),
"color"=c("#08F8F9","#F942F1"),
"max"=c(5,5),
"min"=c(0,0),
"to_plot"=c(11,11))
list_of_plots <- list()
### for each celltype/zone
for(curr_tab in cz_tabs){
## first trim pvals to the celltype "TAB"
pval_df_trimmed <- pval_df_trimmed1[grepl(curr_tab,pval_df_trimmed1$variable),]
curr_celltype <- str_before_first(curr_tab,"_")
curr_zone <- str_after_first(curr_tab,"_")
##### also trim the timepoint_comparison_df
curr_timepoint_comparison_df <- timepoint_comparison_df %>%
dplyr::filter(grepl(paste(contrasts_to_keep, collapse = '|'),name))
#### put pvals in format for plotting, tribbles
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$variable)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(curr_timepoint_comparison_df)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$variable == i & pval_df_trimmed$contrast == curr_timepoint_comparison_df[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
curr_timepoint_comparison_df[g,"time1"],curr_timepoint_comparison_df[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$variable == i & pval_df_trimmed$contrast == curr_timepoint_comparison_df[g,"name"]),"lmer_praw"],
pval_df_trimmed[which(pval_df_trimmed$variable == i & pval_df_trimmed$contrast == curr_timepoint_comparison_df[g,"name"]),"lmer_est"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(curr_tab)]] <- pval_df
}
#### prep phen_to_plot
#### add additional factorized vars
phen_to_plot <- curr_df_long %>%
### filter to only lion
dplyr::filter(treatment %in% treatments_to_keep) %>%
### filter variables
dplyr::filter(celltype_zone %in% pval_df_trimmed$variable) %>%
### factorize
dplyr::mutate(zone = factor(zone, c("GC","noGC","TCZ")),
celltype = factor(celltype, c("CD4","CD8"))) %>%
dplyr::arrange(MacaqueID)
plot_sig_only <- "no"
size_for_points <- 1
stroke_for_points <- 0.4
dodge_for_points <- 0.9
max_y <- y_scale_df$max[which(y_scale_df$celltype == curr_celltype)] #### set this to be consistent for each celltype
min_y <- y_scale_df$min[which(y_scale_df$celltype == curr_celltype)]
maxy_y_to_plot <- y_scale_df$to_plot[which(y_scale_df$celltype == curr_celltype)]
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(curr_timepoint_comparison_df),
length(which(pval_df_trimmed[i,] %>% dplyr::select(-variable) < as.double(params$pCutoff))) #### fix
)
color_df <- data.frame("zone"=c("GC","noGC","TCZ"),
"shape" = c(21, 22, 24))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = value_log10,
shape = zone,
fill = celltype#,
# group = MacaqueID
)) +
scale_fill_manual(values=y_scale_df$color[which(y_scale_df$celltype == curr_celltype)]) +
scale_color_manual(values=y_scale_df$color[which(y_scale_df$celltype == curr_celltype)]) +
scale_shape_manual(values=color_df$shape[which(color_df$zone == curr_zone)]) +
#scale_shape_manual(values=shapes_df$shape_for_plot[which(shapes_df$animal %in% unique(phen_to_plot$MacaqueID))]) +
#geom_line(color="grey",linetype = "dashed")+
geom_beeswarm(aes(
#fill=zone,
#color=zone,
shape=zone,
), fill="white", color="black",size = size_for_points, stroke = stroke_for_points, alpha = 0.9,corral = "random", corral.width = 0.5,
dodge.width = dodge_for_points)+ ## makes side by side groups
# geom_point(aes(
# fill=zone,
# color=zone,
# shape=zone,
# ), size = 3, stroke = 0.8, alpha = 0.9)+ ## makes side by side groups
#geom_text_repel() +
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot), breaks = seq(min_y,maxy_y_to_plot,1))+
#rremove("x.axis")+
theme_bw()+
labs(y = paste0(curr_celltype," ",curr_zone," for all animals (log10)")#,
#title = paste0(i)
)+
theme(legend.position="right", legend.box = "vertical",
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(),
plot.title = element_text(hjust = 0.5), #centers title
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black")
)
##### add p values
### for each zone
first_interval_to_use <- 0.05
interval_to_use <- 0.15
pvals_for_plot <- list_of_pvals[[curr_tab]] %>%
dplyr::mutate(zone=paste0(curr_zone),
celltype=paste0(curr_celltype),
MacaqueID="p") %>%
# add_x_position(x="Day") %>% ### does it in alphabetical order
rownames_to_column() %>%
dplyr::mutate(xmin = ifelse(group1 == "D1",1,
ifelse(group1 == "D3",2,3)),
xmax = ifelse(group2 == "D3",2,
ifelse(group2 == "D8",3,4
)),
y.position = c(max_y+first_interval_to_use*range_y+interval_to_use*as.numeric(rowname)*range_y))
for(curr_row in 1:nrow(pvals_for_plot)){
p <- p +
stat_pvalue_manual(pvals_for_plot[curr_row,],
label = "p",size = 3.5,tip.length = 0.01,
bracket.size = 0.2,
color = ifelse(pvals_for_plot$dir[curr_row] == "up","red",
ifelse(pvals_for_plot$dir[curr_row] == "down","blue",
ifelse(pvals_for_plot$dir[curr_row]== "not_sig","black","grey"))))
}
#### for lumped together add the median and quantiles
p <- p+
geom_violin(data = phen_to_plot, aes(x = Day, y = value_log10, fill = celltype), draw_quantiles = c(0.25, 0.5, 0.75), color = "grey40", alpha = 0.2,
inherit.aes = FALSE, scale = "width", adjust =0.5)+
# scale_color_manual(values = c("purple4","orange","grey60","black","black"))+
stat_summary(fun="mean", geom="crossbar", aes(color = "mean"), color = "black", linetype = "11", size = 0.3) +
stat_summary(fun="median", geom="crossbar",aes(color = "median"), color = "black", linetype = "solid", size = 0.3)
# plot(p)
cat('\n')
cat(' \n \n') ### this is the key!
cat('\n')
cat(' \n \n') ### this is the key!
# plot(p)
list_of_plots[[paste0(curr_tab)]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
contrast_name <- "Fig6BE"
### trim to only vars to plot
variables_ordered <- c("LN__CD4_Tfh_CXCR3pos_Ki67pos",
"LN__CD4_Tfh_CXCR3neg_Ki67pos",
"LN__GC_CD4_Tfh_CXCR3pos_Ki67pos",
"LN__GC_CD4_Tfh_CXCR3neg_Ki67pos",
"LN__CD8_Tfh_CXCR3pos_Ki67pos",
"LN__CD8_Tfh_CXCR3neg_Ki67pos")
data_to_use <- data_to_useFig6 %>%
dplyr::select(c(sampleName,MacaqueID,Day,
!!!rlang::syms(variables_ordered))
)
num_columns_for_plot <- 3
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),variables_ordered),
Day = factor(str_after_first(sampleName,"__"),timepoints_x_order),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = Day, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_beeswarm(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 10), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
Selected scatterplots
########## Reload data from Fig 6F
### which dataset levels/FC, trimmed to the tab
curr_ds <- list_of_diff_counts[[paste0("Fig6FG_GC")]] %>%
column_to_rownames("Gene")
###### MSD = D2-D1 only log2FC
curr_df2 <- data.frame(t(list_of_diff_counts[[paste0("Fig3C_MSD_V4_D2vsD1")]])) %>%
### make row 1 colnames
janitor::row_to_names(row_number = 1) %>%
### trim to only D2-D1
rownames_to_column("sampleName") %>%
dplyr::filter(grepl("D2-D1",sampleName)) %>%
### be sure sorted
dplyr::arrange(sampleName) %>%
column_to_rownames("sampleName") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
selected_pairs_df <- data.frame("var_x"=c("LN__CD4_Tfh_CXCR3pos_Ki67pos",
"LN__GC_CD4_Tfh_CXCR3pos_Ki67pos",
"LN__CD4_Tfh_CXCR3pos_Ki67pos",
"LN__GC_CD4_Tfh_CXCR3pos_Ki67pos",
"LN__CD8_Tfh_CXCR3pos_Ki67pos",
"LN__Bcells_GC_BCL6pos_ki67pos_PctBCells",
"LN__CD8_Tfh_CXCR3pos_Ki67pos",
"LN__Bcells_GC_BCL6pos_ki67pos_PctBCells"),
"Day_x"=c("D8-D1",
"D3-D1",
"D3-D1",
"D8-D1",
"D8-D1",
"D8-D1",
"D8-D1",
"D8-D1"
),
"var_y"=c("IL.7",
"FLT3L",
"CXCL10.IP.10",
"CXCL11.ITAC",
"CXCL10.IP.10",
"FLT3L",
"CXCL11.ITAC",
"CXCL12.SDF.1a"),
"Day_y"=c(rep("D2-D1",8))
)
num_columns_for_plot <- 2
### pivot curr_ds to separate timepoint from animal
df_long <- curr_ds %>%
rownames_to_column("comp_var") %>%
tidyr::pivot_longer(!comp_var, names_to = "sampleName", values_to = "log2FC") %>%
### break out MacaqueID and Day
dplyr::mutate(Day = str_after_first(sampleName,"__"),
Day = str_replace_all(Day,"-","vs"),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
### add in the second df
dplyr::full_join(data.frame(t(curr_df2)) %>%
rownames_to_column("comp_var") %>%
tidyr::pivot_longer(!comp_var,
names_to = "sampleName", values_to = "log2FC") %>%
### break out MacaqueID and Day
dplyr::mutate(Day = str_after_first(sampleName,"__"),
Day = str_replace_all(Day,"\\.","vs"),
Day = str_replace_all(Day,"-","vs"),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL),
by = c("comp_var","Day","MacaqueID","log2FC")
)
list_of_plots <- list()
## for each selected pair
for(i in 1:nrow(selected_pairs_df)){
curr_x <- selected_pairs_df$var_x[i]
curr_Day_x <- str_replace_all(selected_pairs_df$Day_x[i],"-","vs")
curr_y <- selected_pairs_df$var_y[i]
curr_Day_y <- str_replace_all(selected_pairs_df$Day_y[i],"-","vs")
phen_to_plot <- df_long %>%
dplyr::filter(comp_var %in% c(curr_x,curr_y)) %>%
dplyr::filter(Day %in% c(curr_Day_x,curr_Day_y)) %>%
### combine var and time
dplyr::mutate(comp_var_day = paste0(comp_var,"___",Day),
comp_var = NULL, Day = NULL) %>%
tidyr::pivot_wider(names_from = comp_var_day, values_from = log2FC)
#### base plot
p <- ggplot(phen_to_plot, aes(x = !!rlang::sym(paste0(curr_x,"___",curr_Day_x)),
y = !!rlang::sym(paste0(curr_y,"___",curr_Day_y)))) +
geom_point()+
stat_cor(method = "spearman")+
xlab(paste0(curr_x,"\n",curr_Day_x))+
ylab(paste0(curr_y,"\n",curr_Day_y))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title = element_text(size = 10)) ### make smaller to fit
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
CD8 Tcm, Tem, Tfc
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig7A"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl(curr_figure_name,colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__")))) %>%
### remove the control rows that are only there for Fig4E
dplyr::filter(!grepl("control",MacaqueID))
curr_phen_long <- data_to_use %>%
### get average for each day/compartment
# first make longer
dplyr::select(-sampleName) %>%
tidyr::pivot_longer(cols = contains("__"), names_to = "comp_var", values_to = "value") %>%
dplyr::mutate(compartment = str_before_first(comp_var,"__"),
variable = str_after_last(comp_var,"__"),
Day = factor(Day, c("D1","D3","D8","D22"))) %>%
### collect proportion of 100% and means
# collect total
group_by(compartment, Day, MacaqueID) %>%
dplyr::mutate(total_per_comp_time = sum(value)) %>%
ungroup() %>%
# calculate prop within each animal
group_by(compartment, variable, Day, MacaqueID) %>%
dplyr::mutate(prop = value / total_per_comp_time *100) %>%
ungroup() %>%
# take mean across animals
group_by(compartment, variable, Day) %>%
dplyr::summarise(mean = mean(prop)) %>%
# add comp_time
dplyr::mutate(comp_time = paste0(compartment,"__",Day),
variable = factor(variable, c("CM","EM","Naive","cTfc","Tcf1","Tcf2"))) %>%
# sort
ungroup() %>%
dplyr::arrange(compartment,Day,variable)
### To name output files
contrast_name <- "Fig7A_mean_of_props"
list_of_stats[[paste0(contrast_name)]] <- curr_phen_long %>%
dplyr::select(-comp_time)
num_columns_for_plot <- 4
list_of_plots <- list()
## for each comp_time
for(i in unique(curr_phen_long$comp_time)){
phen_to_plot <- curr_phen_long %>%
dplyr::filter(comp_time == i)
### set colors
color_df <- data.frame("variable"=unique(curr_phen_long$variable),
"fill"=c("#021993","#FF2500","#E9E9E9","#72FDFF","#8F8C01","#FF9400"),
"color"=c("white","white","black","grey40","black","white"))
#### base plot
p <- phen_to_plot %>%
## add in labels
dplyr::arrange(variable) %>%
dplyr::mutate(label = paste0(variable,"\n",signif(mean,2))) %>%
## plot
ggplot(aes(x ="", y = mean, fill=variable)) +
scale_fill_manual(values=c(color_df$fill[which(color_df$variable %in% unique(phen_to_plot$variable))])) +
scale_color_manual(values=c(color_df$color[which(color_df$variable %in% unique(phen_to_plot$variable))])) +
geom_bar(stat="identity", width=1, color="grey") +
coord_polar("y", start=0, direction = -1)+
theme_void() +
### add labels
geom_text(aes(label = label, color = variable), cex = 3,
position = position_stack(vjust = 0.5))+
ggtitle(str_replace_all(i,"__",", "))+
theme(legend.position="none",plot.title = element_text(hjust = 0.5))
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig7"
##### select data
data_to_use <- list_of_datasets[["CellPops"]] %>%
### filter to only vars of interes
dplyr::select(c(sampleName,MacaqueID,Day,!!!rlang::syms(colnames(.)[grepl("Fig7_",colnames(.))]))) %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__")))) %>%
### remove the control rows that are only there for Fig4E
dplyr::filter(!grepl("control",MacaqueID))
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_first(sampleName,"__"), ### includes the vaccine
sampleName = NULL
)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("D3","D8","D22",
"D8","D22","D22"),
"time2"=c("D1","D1","D1",
"D3","D3","D8")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig7BC"
#contrast_name <- "Fig5D_Myeloid_vsD1"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 3
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
varcomp_to_plot <- c("blood__CD8_CM_Ki67pos","LN__CD8_CM_Ki67pos",
"blood__CD8_EM_Ki67pos","LN__CD8_EM_Ki67pos",
"blood__CD8_CM_GzmBpos_Ki67pos","LN__CD8_CM_GzmBpos_Ki67pos",
"blood__CD8_EM_GzmBpos_Ki67pos","LN__CD8_EM_GzmBpos_Ki67pos")
### read in degs results
degs_df <- list_of_stats[[paste0(contrast_name)]] %>%
#### trim to only the vars in the figure panel
dplyr::filter(rowname %in% varcomp_to_plot)
#### for hm only want the vs D1
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("D1_log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(str_remove_all(colnames(.),"-D1_log2FC")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
timepoint_factor_order <- c("D3","D8","D22")
############### annotation colors (set number of dirs)
annotation_colors <- list(Day = c(D3="grey40",D8="grey80",D22="grey90"),
dir1 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir2 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir3 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir4 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir5 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir6 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue")
)
names(annotation_colors[["Day"]]) <- timepoint_factor_order
names(annotation_colors) <- c("Day",paste0(str_replace_all(TimePoints_to_compare$name,"-","vs"),"_dir"))
######### annotation for columns
annotation_col <- data.frame("contrast"= colnames(degs_df_for_hm)) %>%
mutate(Day=factor(contrast, timepoint_factor_order)) %>%
arrange(Day) %>%
column_to_rownames("contrast")
############## add row annotation with sig change for FC
### generate contrasts
curr_contrasts <- c()
for(npt in 1:nrow(TimePoints_to_compare)){
cont <- paste0(TimePoints_to_compare$time1[npt],"-",TimePoints_to_compare$time2[npt])
curr_contrasts <- c(curr_contrasts,cont)
}
annotation_row1 <- data.frame("rowname"=degs_df$rowname)
for(curr_contrast in rev(curr_contrasts)){
### trim to curr contrast
curr_pval_df <- degs_df[,c("rowname",colnames(degs_df)[grepl(curr_contrast,colnames(degs_df))])]
### rename column names to be generic
orig_names <- colnames(curr_pval_df)
simp_names <- ifelse(grepl("_",orig_names),str_after_last(orig_names,"_"),orig_names)
colnames(curr_pval_df) <- simp_names
curr_pval_df <- curr_pval_df %>%
dplyr::mutate(dir = ifelse(pval < as.double(params$pCutoff),
ifelse(log2FC > as.double(params$FCcutoff),"up", ifelse(log2FC > 0, "up_p_only",
ifelse(log2FC < (-1)*as.double(params$FCcutoff),"down",ifelse(log2FC < 0, "down_p_only","p_only")))),"not_sig"))
curr_df <- curr_pval_df[,c("rowname","dir")] %>%
dplyr::mutate(dir = factor(dir, c("up","up_p_only","not_sig","down_p_only","down")))
colnames(curr_df) <- c("rowname",paste0(str_remove_all(str_replace_all(curr_contrast,"-","vs"),"V4_"),"_dir"))
annotation_row1 <- dplyr::left_join(annotation_row1,curr_df, by = "rowname")
}
#### make rownames
annotation_row <- column_to_rownames(annotation_row1)
### sort to specified order
curr_ann_row <- annotation_row[varcomp_to_plot,]
mycols = diverging_hcl(n=100, palette="Blue-Red 3")
scale_range = 2
breakscale <- seq(-1*scale_range, scale_range, length=100)
p <- pheatmap(degs_df_for_hm[rownames(curr_ann_row),rownames(annotation_col)], scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
#gaps_col = gaps_to_use,
border_color = NA,
cluster_rows = FALSE, cluster_cols = FALSE,
annotation_col = annotation_col,show_colnames = FALSE,
annotation_row = curr_ann_row,
annotation_colors = annotation_colors
)
p
cat("\n")
cat(' \n \n') ### this is the key!
varcomp_to_plot <- c("blood__CD8_Tfh_Ki67pos","LN__CD8_Tfh_Ki67pos",
"blood__CD8_Tfh_GzmBpos","LN__CD8_Tfh_GzmBpos",
"blood__CD8_Tfh_GzmBpos_Ki67pos","LN__CD8_Tfh_GzmBpos_Ki67pos")
### read in degs results
degs_df <- list_of_stats[[paste0(contrast_name)]] %>%
#### trim to only the vars in the figure panel
dplyr::filter(rowname %in% varcomp_to_plot)
#### for hm only want the vs D1
degs_df_for_hm <- degs_df[,c("rowname",colnames(degs_df)[grepl("D1_log2FC",colnames(degs_df))])] %>%
column_to_rownames() %>%
### remove rows with all na
filter(if_any(everything(), ~ !is.na(.))) %>%
`colnames<-`(str_remove_all(colnames(.),"-D1_log2FC")) %>%
## sort
rownames_to_column() %>%
arrange(rowname) %>%
column_to_rownames()
timepoint_factor_order <- c("D3","D8","D22")
############### annotation colors (set number of dirs)
annotation_colors <- list(Day = c(D3="grey40",D8="grey80",D22="grey90"),
dir1 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir2 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir3 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir4 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir5 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue"),
dir6 = c(up="red",up_p_only="lightpink",not_sig="grey95",down_p_only="skyblue",down="blue")
)
names(annotation_colors[["Day"]]) <- timepoint_factor_order
names(annotation_colors) <- c("Day",paste0(str_replace_all(TimePoints_to_compare$name,"-","vs"),"_dir"))
######### annotation for columns
annotation_col <- data.frame("contrast"= colnames(degs_df_for_hm)) %>%
mutate(Day=factor(contrast, timepoint_factor_order)) %>%
arrange(Day) %>%
column_to_rownames("contrast")
############## add row annotation with sig change for FC
### generate contrasts
curr_contrasts <- c()
for(npt in 1:nrow(TimePoints_to_compare)){
cont <- paste0(TimePoints_to_compare$time1[npt],"-",TimePoints_to_compare$time2[npt])
curr_contrasts <- c(curr_contrasts,cont)
}
annotation_row1 <- data.frame("rowname"=degs_df$rowname)
for(curr_contrast in rev(curr_contrasts)){
### trim to curr contrast
curr_pval_df <- degs_df[,c("rowname",colnames(degs_df)[grepl(curr_contrast,colnames(degs_df))])]
### rename column names to be generic
orig_names <- colnames(curr_pval_df)
simp_names <- ifelse(grepl("_",orig_names),str_after_last(orig_names,"_"),orig_names)
colnames(curr_pval_df) <- simp_names
curr_pval_df <- curr_pval_df %>%
dplyr::mutate(dir = ifelse(pval < as.double(params$pCutoff),
ifelse(log2FC > as.double(params$FCcutoff),"up", ifelse(log2FC > 0, "up_p_only",
ifelse(log2FC < (-1)*as.double(params$FCcutoff),"down",ifelse(log2FC < 0, "down_p_only","p_only")))),"not_sig"))
curr_df <- curr_pval_df[,c("rowname","dir")] %>%
dplyr::mutate(dir = factor(dir, c("up","up_p_only","not_sig","down_p_only","down")))
colnames(curr_df) <- c("rowname",paste0(str_remove_all(str_replace_all(curr_contrast,"-","vs"),"V4_"),"_dir"))
annotation_row1 <- dplyr::left_join(annotation_row1,curr_df, by = "rowname")
}
#### make rownames
annotation_row <- column_to_rownames(annotation_row1)
### sort to specified order
curr_ann_row <- annotation_row[varcomp_to_plot,]
mycols = diverging_hcl(n=100, palette="Blue-Red 3")
scale_range = 2
breakscale <- seq(-1*scale_range, scale_range, length=100)
p <- pheatmap(degs_df_for_hm[rownames(curr_ann_row),rownames(annotation_col)], scale="none", col = mycols,
breaks=breakscale, clustering_method="ward.D2",
#gaps_col = gaps_to_use,
border_color = NA,
cluster_rows = FALSE, cluster_cols = FALSE,
annotation_col = annotation_col,show_colnames = FALSE,
annotation_row = curr_ann_row,
annotation_colors = annotation_colors
)
p
cat("\n")
cat(' \n \n') ### this is the key!
Spearman correlation between MSD D2vsD1 and CD8 T CM/EM populations
########## generate "dataset" format - wide, no metadata with columns = samples (including vax, timepoint, animal), rows = analytes
##### has analyte names as rownames AND as column labeled gene
df <- data_to_use %>%
### remove metadata
dplyr::select(-c(MacaqueID,Day)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "analyte", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL) %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "sampleName", values_from = "log2_value") %>%
dplyr::mutate(Gene=analyte) %>%
column_to_rownames("analyte") %>%
relocate(Gene)
analyte_columns <- rownames(df)
## generate metadata only
targetfile_full <- data_to_use %>%
### remove data columns
dplyr::select(-all_of(analyte_columns))
id_var_of_interest <- "sampleName"
var_of_interest <- "Day" ### matches groups(=timepoints) in contrasts, no MacaqueID
## designate covariates with Day related first, then id
covariates=c("Day","MacaqueID")
contrasts <- c("D3-D1","D8-D1","D22-D1")
contrast_name <- "Fig7DE_CD8T"
raw_or_adj <- "raw"
### which dataset levels/FC, trimmed to the tab
curr_ds <- list_of_diff_counts[[paste0(contrast_name)]] %>%
column_to_rownames("Gene")
###### MSD = D2-D1 only log2FC
curr_df2 <- data.frame(t(list_of_diff_counts[[paste0("Fig3C_MSD_V4_D2vsD1")]])) %>%
### make row 1 colnames
janitor::row_to_names(row_number = 1) %>%
### trim to only D2-D1
rownames_to_column("sampleName") %>%
dplyr::filter(grepl("D2-D1",sampleName)) %>%
### be sure sorted
dplyr::arrange(sampleName) %>%
column_to_rownames("sampleName") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
set_names_to_use <- c(paste0(c("D3-D1","D8-D1","D22-D1"),"_",str_after_first(params$log2_or_raw,"value_"),"FC"))
corr_tag_name <- "CorrVsMSD"
######## calculate correlation
cor_method <- "spearman"
CorrName <- paste0(contrast_name,"__",corr_tag_name)
#### generate empty dataframe for combo
fullcombo_df_long <- data.frame()
for(set_name in set_names_to_use){
curr_df1 <- data.frame(t(curr_ds)) %>%
### trim to only tp of interest
rownames_to_column("sampleName") %>%
dplyr::filter(grepl(str_before_first(set_name,"_"),sampleName)) %>%
### be sure sorted
dplyr::arrange(sampleName) %>%
column_to_rownames("sampleName") %>%
#### then remove columns with all NA
select_if(~ !all(is.na(.))) %>%
## remove rows with all NAs
filter(if_any(everything(), ~ !is.na(.))) %>%
### be sure numeric
mutate_if(is.character, as.numeric)
## generate cor matrix
cor_mat <- cor(as.matrix(signif(curr_df1,7)), as.matrix(signif(curr_df2,7)), method = cor_method, use="pa")
## raw p
cor_test_mat <- data.frame(matrix(NA, nrow=dim(curr_df1)[2], ncol=dim(curr_df2)[2]))
rownames(cor_test_mat) <- colnames(curr_df1)
colnames(cor_test_mat) <- colnames(curr_df2)
for(curr1 in colnames(curr_df1)) {
for(curr2 in colnames(curr_df2)) {
cor_test_mat[curr1, curr2] <-
cor.test(as.matrix(signif(curr_df1,7)[,curr1]),
as.matrix(signif(curr_df2,7)[,curr2]),
method = cor_method, use="pa")$p.value
}
}
## adj p for multiple comparisons by BH over cytokine assays
cor_test_adj_mat <- cor_test_mat #duplicate dataframe to replace values
for(curr_assay in colnames(curr_df1)) {
cor_test_adj_mat[curr_assay,] <- p.adjust(cor_test_mat[curr_assay,], method = "BH")
}
### write function for converting the cor_mat to long format df
corr_to_long <- function(mat){
data.frame(mat) %>%
rownames_to_column("celltype") %>%
tidyr::pivot_longer(!celltype, names_to = "assay", values_to = "value") %>%
dplyr::mutate(celltype_assay = paste0(celltype,"_vs_",assay))
}
#### convert to long
cor_long <- corr_to_long(cor_mat)
if(raw_or_adj == "raw"){
p_long <- corr_to_long(cor_test_mat)
}else{
p_long <- corr_to_long(cor_test_adj_mat)
}
#### combine cor and p long dfs
curr_df_long <- dplyr::left_join(cor_long,p_long, by = c("celltype","assay","celltype_assay")) %>%
### rename values to be useful
dplyr::rename(corr_coef=value.x,p=value.y) %>%
### add column with set_name
dplyr::mutate(timepoint = paste0(str_replace_all(set_name,"-","vs")))
### add to fullcombo
fullcombo_df_long <- rbind(fullcombo_df_long,curr_df_long)
}
### add to stats master list
list_of_stats[[paste0(CorrName)]] <- fullcombo_df_long %>%
dplyr::relocate(celltype, assay, corr_coef, p, timepoint)
barbara_order <- c("blood__CD8_CM_Ki67pos",
"blood__CD8_CM_GzmBpos",
"LN__CD8_CM_Ki67pos",
"LN__CD8_CM_GzmBpos",
"blood__CD8_EM_Ki67pos",
"blood__CD8_EM_GzmBpos",
"LN__CD8_EM_Ki67pos",
"LN__CD8_EM_GzmBpos")
## only for the Intervals
timepoints_to_use <- paste0(c("D3-D1","D8-D1","D22-D1"),"_",str_after_first(params$log2_or_raw,"value_"),"FC")
TP_name <- "Intervals"
corr_tag_name <- "CorrVsMSD"
raw_or_adj <- "raw"
cor_method <- "spearman"
trim_pval_level <- 0.05
all_or_selected <- "selected"
assay_arrangement <- c("CCL2.MCP.1","CCL3.MIP.1a","CCL4.MIP.1b","CCL8.MCP.2",
"CXCL10.IP.10","CXCL11.ITAC","IL.15","M.CSF",
"CCL26.Eotaxin.3","CCL13.MCP.4","CXCL12.SDF.1a",
"FLT3L","IL.7","CX3CL1.Fractalkine","IFN.g") ### if trimming/ordering assays
### set pos/neg/both
directions_to_use <- c("pos")
assays_to_exclude <- c("CCL27.CTACK","IL.5")
raw p < 0.05
#### make df for plotting
df_long1 <- fullcombo_df_long %>%
### generate dir column
dplyr::mutate(dir = ifelse(p < trim_pval_level,
ifelse(corr_coef > 0, "pos","neg"),
"notSig"),
### break out compartment
compartment = str_before_first(celltype,"__"),
celltype = str_after_first(celltype,"__"),
celltype_assay = str_after_first(celltype_assay,"__")) %>%
### remove the corr and p vals for pivoting
dplyr::select(-c(corr_coef,p))
###### set up inital dataframe
## trim to only sig
bub_df2 <- df_long1[which(df_long1$dir %in% directions_to_use ),] %>%
### also remove any analytes_to_exclude
dplyr::filter(!assay %in% assays_to_exclude) %>%
dplyr::mutate(timepoint = factor(str_before_first(timepoint,"_"),str_replace_all(str_before_first(timepoints_to_use,"_"),"-","vs")),
dir = factor(dir, directions_to_use),
##### add compartment to celltype
comp_celltype = paste0(compartment,"__",celltype))
#### remove timepoint suffix from celltype (only needed for vs Abs/NAbs)
cleaned_celltypes <- bub_df2$comp_celltype
for(tu in timepoints_to_use){
cleaned_celltypes <- str_remove_all(cleaned_celltypes,paste0("__",str_replace_all(tu,"-","vs")))
}
bub_df2$celltype <- cleaned_celltypes
#### if selected tab combos, keep cell pops on the x-axis
bub_df_bothordered <- bub_df2 %>%
#### trim to desired assays
dplyr::filter(assay %in% assay_arrangement) %>%
### remove factor levels for assay
dplyr::mutate(assay = factor(as.character(assay),assay_arrangement),
### also breakout for celltype
celltype_only = str_after_first(celltype,"__"),
compartment = factor(compartment, c("blood","LN"))) %>%
dplyr::arrange(assay) %>%
#### trim to barbara_order celltypes
dplyr::filter(celltype %in% barbara_order) %>%
## lock in factor levels for assay (rev for y axis since want it to be reversed)
dplyr::mutate(assay = factor(assay, rev(unique(assay))),
### now get celltype order
celltype = factor(celltype, intersect(barbara_order,unique(celltype)))) %>%
#### now arrange by timepoint to be able to see bulls eyes
dplyr::arrange(desc(timepoint))
###### reversed axes!!!
bba <-
ggplot(bub_df_bothordered, aes(x = celltype, y = assay))+
geom_point(aes(size = timepoint, color = timepoint))+
#scale_alpha_manual(values = alpha_values)+
scale_size_manual(values = c(3,6,10))+
scale_color_manual(values = c("#262626","#7F7F7F","#BFBFBF"))+
theme_bw()+
### make legend symbol bigger & fix color
# guides(color = guide_legend(override.aes = list(size = 5)),
# size = guide_legend(override.aes = list(color = leg_color)))+
## remove gridlines
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
## rotate axis labels
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plot(bba)
cat("\n")
cat(' \n \n') ### this is the key!
### last plot in loop doesn't appear unless add this
cat("\n")
cat(' \n \n') ### this is the key!
###### need to fix why this plots in Rstudio but not renedered
Spearman correlation between MSD D2vsD1 and CD8 Tfh populations
barbara_order <- c("blood__CD8_Tfh_Ki67pos",
"blood__CD8_Tfh_GzmBpos",
"LN__CD8_Tfh_Ki67pos",
"LN__CD8_Tfh_GzmBpos")
## only for the Intervals
timepoints_to_use <- paste0(c("D3-D1","D8-D1","D22-D1"),"_",str_after_first(params$log2_or_raw,"value_"),"FC")
TP_name <- "Intervals"
corr_tag_name <- "CorrVsMSD"
raw_or_adj <- "raw"
cor_method <- "spearman"
trim_pval_level <- 0.05
all_or_selected <- "selected"
assay_arrangement <- c("CCL2.MCP.1","CCL3.MIP.1a","CCL4.MIP.1b","CCL8.MCP.2",
"CXCL10.IP.10","CXCL11.ITAC","IL.15","M.CSF",
"CCL26.Eotaxin.3","CCL19.MIP.3b","FLT3L","CCL13.MCP.4","CXCL12.SDF.1a",
"CX3CL1.Fractalkine") ### set pos/neg/both
directions_to_use <- c("pos")
assays_to_exclude <- c("CCL27.CTACK","IL.5")
raw p < 0.05
#### make df for plotting
df_long1 <- fullcombo_df_long %>%
### generate dir column
dplyr::mutate(dir = ifelse(p < trim_pval_level,
ifelse(corr_coef > 0, "pos","neg"),
"notSig"),
### break out compartment
compartment = str_before_first(celltype,"__"),
celltype = str_after_first(celltype,"__"),
celltype_assay = str_after_first(celltype_assay,"__")) %>%
### remove the corr and p vals for pivoting
dplyr::select(-c(corr_coef,p))
###### set up inital dataframe
## trim to only sig
bub_df2 <- df_long1[which(df_long1$dir %in% directions_to_use ),] %>%
### also remove any analytes_to_exclude
dplyr::filter(!assay %in% assays_to_exclude) %>%
dplyr::mutate(timepoint = factor(str_before_first(timepoint,"_"),str_replace_all(str_before_first(timepoints_to_use,"_"),"-","vs")),
dir = factor(dir, directions_to_use),
##### add compartment to celltype
comp_celltype = paste0(compartment,"__",celltype))
#### remove timepoint suffix from celltype (only needed for vs Abs/NAbs)
cleaned_celltypes <- bub_df2$comp_celltype
for(tu in timepoints_to_use){
cleaned_celltypes <- str_remove_all(cleaned_celltypes,paste0("__",str_replace_all(tu,"-","vs")))
}
bub_df2$celltype <- cleaned_celltypes
#### if selected tab combos, keep cell pops on the x-axis
bub_df_bothordered <- bub_df2 %>%
#### trim to desired assays
dplyr::filter(assay %in% assay_arrangement) %>%
### remove factor levels for assay
dplyr::mutate(assay = factor(as.character(assay),assay_arrangement),
### also breakout for celltype
celltype_only = str_after_first(celltype,"__"),
compartment = factor(compartment, c("blood","LN"))) %>%
dplyr::arrange(assay) %>%
#### trim to barbara_order celltypes
dplyr::filter(celltype %in% barbara_order) %>%
## lock in factor levels for assay (rev for y axis since want it to be reversed)
dplyr::mutate(assay = factor(assay, rev(unique(assay))),
### now get celltype order
celltype = factor(celltype, intersect(barbara_order,unique(celltype)))) %>%
#### now arrange by timepoint to be able to see bulls eyes
dplyr::arrange(desc(timepoint))
###### reversed axes!!!
bba <-
ggplot(bub_df_bothordered, aes(x = celltype, y = assay))+
geom_point(aes(size = timepoint, color = timepoint))+
#scale_alpha_manual(values = alpha_values)+
scale_size_manual(values = c(3,6,10))+
scale_color_manual(values = c("#262626","#7F7F7F","#BFBFBF"))+
theme_bw()+
### make legend symbol bigger & fix color
# guides(color = guide_legend(override.aes = list(size = 5)),
# size = guide_legend(override.aes = list(color = leg_color)))+
## remove gridlines
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
## rotate axis labels
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plot(bba)
cat("\n")
cat(' \n \n') ### this is the key!
### last plot in loop doesn't appear unless add this
cat("\n")
cat(' \n \n') ### this is the key!
###### need to fix why this plots in Rstudio but not renedered
AIM assay
##### set prior for log2 (0 for MSD, 1e-4 for cell pops)
prior_to_use <- 1e-4
curr_figure_name <- "Fig8"
##### select data
data_to_use <- list_of_datasets[["AIM"]] %>%
### remove fig name from colnames
`colnames<-`(c(str_remove_all(colnames(.),paste0(curr_figure_name,"__"))))
### generate dataframe: rows = samples, columns = variables and group for comparison (= TimePoint)
curr_df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,treat)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
dplyr::mutate(log2_value = log2(raw_value + as.double(prior_to_use)),
raw_value = NULL,
### combine variable + timepoint
MacaqueID = str_before_first(sampleName,"__"),
TimePoint = str_after_first(sampleName,"__"), ### includes the vaccine
sampleName = NULL
)
### select comparisons to make: time1 - time2
TimePoints_to_compare <- data.frame(
"time1"=c("Spike"),
"time2"=c("medium")) %>%
dplyr::mutate(name = paste0(time1,"-",time2))
## set raw or adj p-val
raw_or_adj <- "_pval" #"_adjpval"
### To name output files
contrast_name <- "Fig8ABDE"
### for scatterplots
plot_sig_only <- "no"
num_columns_for_plot <- 3
#### try glm
curr_df <- curr_df_long %>%
### make wider into dataset format
tidyr::pivot_wider(names_from = "variable", values_from = "log2_value")
## prep pvaldf
p_value_df2 <- data.frame("rowname" = unique(curr_df_long$variable))
for (npt in 1:dim(TimePoints_to_compare)[1]){
### select animals with both timepoints
animal_df <- as.data.frame.matrix(table(curr_df$MacaqueID, curr_df$TimePoint)) %>%
dplyr::select(!!!rlang::syms(c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt]))) %>%
## rename to generic
dplyr::rename(time2=1, time1=2) %>%
dplyr::mutate(combo = (time2+time1)) %>%
dplyr::filter(combo ==2) %>%
rownames_to_column()
#trim to only the 2 TimePoints, will remove vars without these TimePoint
curr_df2 <- curr_df %>%
# only animals with both timepoints
dplyr::filter(MacaqueID %in% animal_df$rowname) %>%
# only rows with time1 or time2
dplyr::filter(TimePoint %in% c(TimePoints_to_compare$time1[npt],TimePoints_to_compare$time2[npt])) %>%
## remove variables with all na
dplyr::select(where(function(x) !all(is.na(x)))) %>%
#### prep for glm
dplyr::mutate(TimePoint_f = factor(TimePoint, c(TimePoints_to_compare$time2[npt],TimePoints_to_compare$time1[npt])),
ID_f = factor(MacaqueID)) %>%
dplyr::arrange(ID_f)
### select variables with both TimePoints
curr_vars <- colnames(curr_df2)[-which(colnames(curr_df2) %in% c("MacaqueID","TimePoint","TimePoint_f","ID_f"))]
######## get rid of columns that don't have data for both timepoints
######### only for columns that are not all NAs
NA_columns_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),] %>% keep(~all(is.na(.x))) %>% names #https://www.statology.org/r-find-columns-with-all-na/
NA_columns_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),] %>% keep(~all(is.na(.x))) %>% names
####### also remove columns with all zeros for both timepoints
allZero_columns <- which(colSums(curr_df2[,intersect(colnames(curr_df2),curr_vars)]) == 0) %>% names
####### finally, remove columns with no animals that have both timepoints
noOverlap_columns <- c()
for(curr_v in intersect(colnames(curr_df2),curr_vars)){
animals_tp1 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time1[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
animals_tp2 <- curr_df2[which(curr_df2$TimePoint == TimePoints_to_compare$time2[npt]),c("MacaqueID",curr_v)] %>%
dplyr::filter(!is.na(!!rlang::sym(curr_v)))
if(length(intersect(animals_tp1$MacaqueID,animals_tp2$MacaqueID)) == 0){
noOverlap_columns <- c(noOverlap_columns,curr_v)
}else{
noOverlap_columns <- noOverlap_columns
}
}
nonNA_columns <- setdiff(setdiff(setdiff(setdiff(curr_vars, NA_columns_tp1), NA_columns_tp2),allZero_columns),noOverlap_columns)
### if there are any columns with data for both TimePoints, compute for those columns
if(length(nonNA_columns)>0){
new_variable_mat_prep <- data.frame(as.matrix(data.matrix(curr_df2[,nonNA_columns])))
colnames(new_variable_mat_prep) <- nonNA_columns
new_variable_mat <- new_variable_mat_prep %>%
### remove if all have identical value
select(where(~n_distinct(.) > 1)) %>%
as.matrix(.)
##### glm not giving sensical for BAMA ---> need to log10
curr_p_raw <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Pr(>|W|)'])
curr_p_adj <- p.adjust(curr_p_raw, method = 'BH')
curr_est <- apply(new_variable_mat, 2, function(x) coef(summary(geeglm(scale(x) ~ TimePoint_f, id=ID_f, data = curr_df2, family="gaussian", corstr="exchangeable")))[2,'Estimate'])
#### no singular errors = good!
curr_p_df <- data.frame(curr_est, curr_p_raw, curr_p_adj) %>%
rownames_to_column()
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
###### if there are no columns with nonNA data for this TimePoint, then add nothing
}else{
curr_p_df <- data.frame("rowname"=curr_vars,
"est"=NA,
"pval"=NA,
"adjpval"=NA)
colnames(curr_p_df) <- c("rowname",
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_log2FC"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_pval"),
paste0(TimePoints_to_compare$time1[npt],"-",
TimePoints_to_compare$time2[npt],"_adjpval"))
}
p_value_df2 <- dplyr::full_join(p_value_df2, curr_p_df, by="rowname")
}
### save stats to list
list_of_stats[[paste0(contrast_name)]] <- p_value_df2
varcomp_to_plot <- colnames(list_of_datasets[["AIM"]])[grepl("panel8A",colnames(list_of_datasets[["AIM"]]))] %>%
str_remove_all("Fig8__")
num_columns_for_plot <- 1
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,treat)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### trim to only the vars in this panel
dplyr::filter(variable %in% varcomp_to_plot) %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),varcomp_to_plot),
treat = factor(str_after_first(sampleName,"__"),c("medium","Spike")),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = treat, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_point(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
geom_line(aes(group = MacaqueID), color = "grey30")+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 6), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
varcomp_to_plot <- colnames(list_of_datasets[["AIM"]])[grepl("panel8B",colnames(list_of_datasets[["AIM"]]))]%>%
str_remove_all("Fig8__")
num_columns_for_plot <- 1
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,treat)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### trim to only the vars in this panel
dplyr::filter(variable %in% varcomp_to_plot) %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),varcomp_to_plot),
treat = factor(str_after_first(sampleName,"__"),c("medium","Spike")),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = treat, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_point(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
geom_line(aes(group = MacaqueID), color = "grey30")+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 6), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
varcomp_to_plot <- colnames(list_of_datasets[["AIM"]])[grepl("panel8C",colnames(list_of_datasets[["AIM"]]))]%>%
str_remove_all("Fig8__")
num_columns_for_plot <- 1
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,treat)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### trim to only the vars in this panel
dplyr::filter(variable %in% varcomp_to_plot) %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),varcomp_to_plot),
treat = factor(str_after_first(sampleName,"__"),c("medium","Spike")),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = treat, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_point(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
geom_line(aes(group = MacaqueID), color = "grey30")+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 6), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
varcomp_to_plot <- colnames(list_of_datasets[["AIM"]])[grepl("panel8D",colnames(list_of_datasets[["AIM"]]))]%>%
str_remove_all("Fig8__")
num_columns_for_plot <- 3
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,treat)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### trim to only the vars in this panel
dplyr::filter(variable %in% varcomp_to_plot) %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),varcomp_to_plot),
treat = factor(str_after_first(sampleName,"__"),c("medium","Spike")),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = treat, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_point(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
geom_line(aes(group = MacaqueID), color = "grey30")+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 6), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
varcomp_to_plot <- colnames(list_of_datasets[["AIM"]])[grepl("panel8E",colnames(list_of_datasets[["AIM"]]))]%>%
str_remove_all("Fig8__")
num_columns_for_plot <- 3
df_long <- data_to_use %>%
dplyr::select(-c(MacaqueID,treat)) %>%
### make longer to log2
tidyr::pivot_longer(cols=!sampleName, names_to = "variable", values_to = "raw_value") %>%
### trim to only the vars in this panel
dplyr::filter(variable %in% varcomp_to_plot) %>%
### break out compartment and variable
dplyr::mutate(comp_var = variable,
compartment = factor(str_before_first(variable,"__"),c("blood","LN")),
variable = factor(str_after_first(variable,"__"),varcomp_to_plot),
treat = factor(str_after_first(sampleName,"__"),c("medium","Spike")),
MacaqueID = str_before_first(sampleName,"__"),
sampleName = NULL) %>%
dplyr::arrange(compartment, variable)
### set up p-values: rowname (comp_var), contrast, log2FC, pval, adjpval
pval_df_trimmed <- list_of_stats[[paste0(contrast_name)]] %>%
###make longer
tidyr::pivot_longer(cols=!rowname, names_to = "contrast_test", values_to = "value") %>%
dplyr::mutate(test = str_after_first(contrast_test,"_"),
contrast = str_before_first(contrast_test,"_"),
contrast_test = NULL) %>%
tidyr::pivot_wider(names_from = "test",values_from = "value") %>%
data.frame()
### for each comp_var
list_of_pvals <- list()
for (i in unique(pval_df_trimmed$rowname)){
pval_df <- tibble::tribble(~group1, ~group2, ~p, ~est, ~dir)
for (g in 1:nrow(TimePoints_to_compare)){
#### only if have the timepoints
if(dim(pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),])[1] > 0){
### tribble
curr_df <- tibble::tribble(~group1, ~group2, ~p, ~est,
TimePoints_to_compare[g,"time1"],TimePoints_to_compare[g,"time2"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"pval"],
pval_df_trimmed[which(pval_df_trimmed$rowname == i & pval_df_trimmed$contrast == TimePoints_to_compare[g,"name"]),"log2FC"]
) %>%
### add dir
dplyr::mutate(dir = ifelse(p < params$pCutoff,
ifelse(est > 0, "up","down"),
"not_sig"),
### set sig figs
p = signif(p,3))
pval_df <- add_row(pval_df, curr_df)
}else{
pval_df <- pval_df
}
}
list_of_pvals[[paste0(i)]] <- pval_df
}
list_of_plots <- list()
## for each comp var
for(i in unique(df_long$comp_var)){
phen_to_plot <- df_long %>%
dplyr::filter(comp_var == i)
# cat("##### ",paste0(i," "),"\n")
# cat("\n")
max_y <- max(phen_to_plot$raw_value, na.rm=TRUE)
min_y <- min(phen_to_plot$raw_value, na.rm=TRUE)
range_y <- max_y-min_y
num_contrasts <- ifelse(plot_sig_only =="no",
nrow(TimePoints_to_compare),
length(which(list_of_pvals[[paste0(i)]][,"p"] < 0.05))
)
maxy_y_to_plot <- ifelse(plot_sig_only =="no",
max_y*(1+0.1*(num_contrasts+1)),
max_y*(1+0.1*(num_contrasts+1))
)
# color_df <- data.frame("variable"=levels(phen_to_plot$variable),
#
# "fill"=c("#929292","#008F00","#008EF1"))
#### base plot
p <- ggplot(phen_to_plot, aes(x = treat, y = raw_value)) +
# scale_color_manual(values=c(color_df$fill[which(color_df$variable == str_after_first(i,"__"))])) +
geom_point(#aes(color=variable),
size = 2, shape=16, alpha=0.8, cex = 3)+
geom_line(aes(group = MacaqueID), color = "grey30")+
scale_y_continuous(limits = c(min_y*(0.8),maxy_y_to_plot)
)+
labs(y = paste0(i))+
theme_bw()+
theme(legend.position="none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), axis.line = element_line(colour = "black"),
axis.title.y = element_text(size = 6), ### make smaller to fit
axis.title.x = element_blank())
box_pval_cut <- 0.05
##### add p values
### if plotting all p
if(plot_sig_only == "no"){
for(l in 1:num_contrasts){
p <- p +
stat_pvalue_manual(list_of_pvals[[i]][l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y,
bracket.size = 0.2, tip.length = 0)
}
}else{
# if there are any sig contrasts
if(num_contrasts > 0){
## trim to sig contrasts
sig_pvals <- list_of_pvals[[i]][which(list_of_pvals[[i]][,"p"] < 0.05),] %>%
### add name
dplyr::left_join(TimePoints_to_compare, by =c("group1","group2"))
for(l in 1:nrow(sig_pvals)){
# if(pval_combined[which(rownames(pval_combined) == i),paste0(sig_comp_pairs_df$name[l])] < as.double(box_pval_cut)){
p <- p +
stat_pvalue_manual(sig_pvals[l,], label = "p",size = 2.5,
y.position = max_y+0.1*l*range_y, bracket.size = 0.2, tip.length = 0)
# }else{
# p <- p
# }
}
}else{
p <- p
}
}
# plot(p)
list_of_plots[[i]] <- p
cat("\n")
cat(' \n \n') ### this is the key!
}
do.call("grid.arrange",c(list_of_plots, ncol=num_columns_for_plot))
##save
openxlsx::write.xlsx(list_of_datasets,file.path(outputTABLES_dir,paste0("list_of_RAW_datasets.xlsx")))
#### add with button
xfun::embed_file(file.path(outputTABLES_dir,paste0("list_of_RAW_datasets.xlsx")), text = "Excel spreadsheet of RAW datasets")
Excel spreadsheet of RAW datasets
##save
openxlsx::write.xlsx(list_of_diff_counts,file.path(outputTABLES_dir,paste0("list_of_log2FC.xlsx")))
#### add with button
xfun::embed_file(file.path(outputTABLES_dir,paste0("list_of_log2FC.xlsx")), text = "Excel spreadsheet of log2 FC")
Excel spreadsheet of log2 FC
##save
openxlsx::write.xlsx(list_of_stats,file.path(outputTABLES_dir,paste0("list_of_stats.xlsx")))
#### add with button
xfun::embed_file(file.path(outputTABLES_dir,paste0("list_of_stats.xlsx")), text = "Excel spreadsheet of stats")
Excel spreadsheet of stats
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] lmerTest_3.1-3 lme4_1.1-35.3 Matrix_1.7-0
## [4] geepack_1.3.10 ggalluvial_0.12.5 dendextend_1.17.1
## [7] flextable_0.9.6 corrplot_0.92 edgeR_4.2.0
## [10] limma_3.60.0 EnhancedVolcano_1.22.0 colorspace_2.1-0
## [13] RColorBrewer_1.1-3 dendsort_0.3.4 strex_2.0.0
## [16] ggplotify_0.1.2 plotly_4.10.4 ggrepel_0.9.5
## [19] ggbeeswarm_0.7.2 downloadthis_0.3.3 DT_0.33
## [22] gridExtra_2.3 org.Hs.eg.db_3.19.1 AnnotationDbi_1.66.0
## [25] IRanges_2.38.0 S4Vectors_0.42.0 Biobase_2.64.0
## [28] BiocGenerics_0.50.0 gtsummary_1.7.2 kableExtra_1.4.0
## [31] broom_1.0.5 knitr_1.46 enrichR_3.2
## [34] eulerr_7.0.2 ggpubr_0.6.0 pheatmap_1.0.12
## [37] cowplot_1.1.3 lubridate_1.9.3 forcats_1.0.0
## [40] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2
## [43] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [46] ggplot2_3.5.1 tidyverse_2.0.0 readxl_1.4.3
## [49] openxlsx_4.2.5.2
##
## loaded via a namespace (and not attached):
## [1] splines_4.4.1 later_1.3.2 cellranger_1.1.0
## [4] polyclip_1.10-6 janitor_2.2.0 lifecycle_1.0.4
## [7] rstatix_0.7.2 lattice_0.22-6 MASS_7.3-60.2
## [10] crosstalk_1.2.1 backports_1.4.1 magrittr_2.0.3
## [13] sass_0.4.9 rmarkdown_2.26 jquerylib_0.1.4
## [16] yaml_2.3.8 httpuv_1.6.15 zip_2.3.1
## [19] askpass_1.2.0 DBI_1.2.2 minqa_1.2.6
## [22] abind_1.4-5 zlibbioc_1.50.0 yulab.utils_0.1.4
## [25] WriteXLS_6.5.0 tweenr_2.0.3 gdtools_0.3.7
## [28] GenomeInfoDbData_1.2.12 crul_1.4.2 svglite_2.1.3
## [31] ggforce_0.4.2 xml2_1.3.6 tidyselect_1.2.1
## [34] farver_2.1.2 httpcode_0.3.0 UCSC.utils_1.0.0
## [37] viridis_0.6.5 broom.helpers_1.15.0 jsonlite_1.8.8
## [40] systemfonts_1.0.6 tools_4.4.1 ragg_1.3.1
## [43] Rcpp_1.0.12 glue_1.7.0 xfun_0.43
## [46] GenomeInfoDb_1.40.0 withr_3.0.0 numDeriv_2016.8-1.1
## [49] fastmap_1.1.1 boot_1.3-30 fansi_1.0.6
## [52] openssl_2.1.2 digest_0.6.35 timechange_0.3.0
## [55] R6_2.5.1 mime_0.12 gridGraphics_0.5-1
## [58] textshaping_0.3.7 RSQLite_2.3.6 utf8_1.2.4
## [61] generics_0.1.3 fontLiberation_0.1.0 data.table_1.15.4
## [64] httr_1.4.7 htmlwidgets_1.6.4 pkgconfig_2.0.3
## [67] gtable_0.3.5 blob_1.2.4 XVector_0.44.0
## [70] htmltools_0.5.8.1 fontBitstreamVera_0.1.1 carData_3.0-5
## [73] scales_1.3.0 png_0.1-8 snakecase_0.11.1
## [76] rstudioapi_0.16.0 tzdb_0.4.0 rjson_0.2.21
## [79] uuid_1.2-0 checkmate_2.3.1 nlme_3.1-164
## [82] curl_5.2.1 nloptr_2.0.3 cachem_1.0.8
## [85] vipor_0.4.7 pillar_1.9.0 grid_4.4.1
## [88] vctrs_0.6.5 promises_1.3.0 car_3.1-2
## [91] xtable_1.8-4 beeswarm_0.4.0 evaluate_0.23
## [94] cli_3.6.2 locfit_1.5-9.9 compiler_4.4.1
## [97] rlang_1.1.4 crayon_1.5.2 ggsignif_0.6.4
## [100] labeling_0.4.3 fs_1.6.4 stringi_1.8.4
## [103] viridisLite_0.4.2 munsell_0.5.1 Biostrings_2.72.0
## [106] lazyeval_0.2.2 fontquiver_0.2.1 hms_1.1.3
## [109] bit64_4.0.5 gfonts_0.2.0 KEGGREST_1.44.0
## [112] statmod_1.5.0 shiny_1.8.1.1 highr_0.10
## [115] gt_0.10.1 memoise_2.0.1 bslib_0.7.0
## [118] bit_4.0.5 officer_0.6.6